From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- .../lib/PersonalityProvider/RecipeExecutor.jsm | 1126 ++++++++++++++++++++ 1 file changed, 1126 insertions(+) create mode 100644 browser/components/newtab/lib/PersonalityProvider/RecipeExecutor.jsm (limited to 'browser/components/newtab/lib/PersonalityProvider/RecipeExecutor.jsm') diff --git a/browser/components/newtab/lib/PersonalityProvider/RecipeExecutor.jsm b/browser/components/newtab/lib/PersonalityProvider/RecipeExecutor.jsm new file mode 100644 index 0000000000..9dbf8b802d --- /dev/null +++ b/browser/components/newtab/lib/PersonalityProvider/RecipeExecutor.jsm @@ -0,0 +1,1126 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +// We load this into a worker using importScripts, and in tests using import. +// We use var to avoid name collision errors. +// eslint-disable-next-line no-var +var EXPORTED_SYMBOLS = ["RecipeExecutor"]; + +/** + * RecipeExecutor is the core feature engineering pipeline for the in-browser + * personalization work. These pipelines are called "recipes". A recipe is an + * array of objects that define a "step" in the recipe. A step is simply an + * object with a field "function" that specifies what is being done in the step + * along with other fields that are semantically defined for that step. + * + * There are two types of recipes "builder" recipes and "combiner" recipes. Builder + * recipes mutate an object until it matches some set of critera. Combiner + * recipes take two objects, (a "left" and a "right"), and specify the steps + * to merge the right object into the left object. + * + * A short nonsense example recipe is: + * [ {"function": "get_url_domain", "path_length": 1, "field": "url", "dest": "url_domain"}, + * {"function": "nb_tag", "fields": ["title", "description"]}, + * {"function": "conditionally_nmf_tag", "fields": ["title", "description"]} ] + * + * Recipes are sandboxed by the fact that the step functions must be explicitly + * allowed. Functions allowed for builder recipes are specifed in the + * RecipeExecutor.ITEM_BUILDER_REGISTRY, while combiner functions are allowed + * in RecipeExecutor.ITEM_COMBINER_REGISTRY . + */ +const RecipeExecutor = class RecipeExecutor { + constructor(nbTaggers, nmfTaggers, tokenize) { + this.ITEM_BUILDER_REGISTRY = { + nb_tag: this.naiveBayesTag, + conditionally_nmf_tag: this.conditionallyNmfTag, + accept_item_by_field_value: this.acceptItemByFieldValue, + tokenize_url: this.tokenizeUrl, + get_url_domain: this.getUrlDomain, + tokenize_field: this.tokenizeField, + copy_value: this.copyValue, + keep_top_k: this.keepTopK, + scalar_multiply: this.scalarMultiply, + elementwise_multiply: this.elementwiseMultiply, + vector_multiply: this.vectorMultiply, + scalar_add: this.scalarAdd, + vector_add: this.vectorAdd, + make_boolean: this.makeBoolean, + allow_fields: this.allowFields, + filter_by_value: this.filterByValue, + l2_normalize: this.l2Normalize, + prob_normalize: this.probNormalize, + set_default: this.setDefault, + lookup_value: this.lookupValue, + copy_to_map: this.copyToMap, + scalar_multiply_tag: this.scalarMultiplyTag, + apply_softmax_tags: this.applySoftmaxTags, + }; + this.ITEM_COMBINER_REGISTRY = { + combiner_add: this.combinerAdd, + combiner_max: this.combinerMax, + combiner_collect_values: this.combinerCollectValues, + }; + this.nbTaggers = nbTaggers; + this.nmfTaggers = nmfTaggers; + this.tokenize = tokenize; + } + + /** + * Determines the type of a field. Valid types are: + * string + * number + * array + * map (strings to anything) + */ + _typeOf(data) { + let t = typeof data; + if (t === "object") { + if (data === null) { + return "null"; + } + if (Array.isArray(data)) { + return "array"; + } + return "map"; + } + return t; + } + + /** + * Returns a scalar, either because it was a constant, or by + * looking it up from the item. Allows for a default value if the lookup + * fails. + */ + _lookupScalar(item, k, dfault) { + if (this._typeOf(k) === "number") { + return k; + } else if ( + this._typeOf(k) === "string" && + k in item && + this._typeOf(item[k]) === "number" + ) { + return item[k]; + } + return dfault; + } + + /** + * Simply appends all the strings from a set fields together. If the field + * is a list, then the cells of the list are append. + */ + _assembleText(item, fields) { + let textArr = []; + for (let field of fields) { + if (field in item) { + let type = this._typeOf(item[field]); + if (type === "string") { + textArr.push(item[field]); + } else if (type === "array") { + for (let ele of item[field]) { + textArr.push(String(ele)); + } + } else { + textArr.push(String(item[field])); + } + } + } + return textArr.join(" "); + } + + /** + * Runs the naive bayes text taggers over a set of text fields. Stores the + * results in new fields: + * nb_tags: a map of text strings to probabilites + * nb_tokens: the tokenized text that was tagged + * + * Config: + * fields: an array containing a list of fields to concatenate and tag + */ + naiveBayesTag(item, config) { + let text = this._assembleText(item, config.fields); + let tokens = this.tokenize(text); + let tags = {}; + let extended_tags = {}; + + for (let nbTagger of this.nbTaggers) { + let result = nbTagger.tagTokens(tokens); + if (result.label !== null && result.confident) { + extended_tags[result.label] = result; + tags[result.label] = Math.exp(result.logProb); + } + } + item.nb_tags = tags; + item.nb_tags_extended = extended_tags; + item.nb_tokens = tokens; + return item; + } + + /** + * Selectively runs NMF text taggers depending on which tags were found + * by the naive bayes taggers. Writes the results in into new fields: + * nmf_tags_parent_weights: map of pareent tags to probabilites of those parent tags + * nmf_tags: map of strings to maps of strings to probabilities + * nmf_tags_parent map of child tags to parent tags + * + * Config: + * Not configurable + */ + conditionallyNmfTag(item, config) { + let nestedNmfTags = {}; + let parentTags = {}; + let parentWeights = {}; + + if (!("nb_tags" in item) || !("nb_tokens" in item)) { + return null; + } + + Object.keys(item.nb_tags).forEach(parentTag => { + let nmfTagger = this.nmfTaggers[parentTag]; + if (nmfTagger !== undefined) { + nestedNmfTags[parentTag] = {}; + parentWeights[parentTag] = item.nb_tags[parentTag]; + let nmfTags = nmfTagger.tagTokens(item.nb_tokens); + Object.keys(nmfTags).forEach(nmfTag => { + nestedNmfTags[parentTag][nmfTag] = nmfTags[nmfTag]; + parentTags[nmfTag] = parentTag; + }); + } + }); + + item.nmf_tags = nestedNmfTags; + item.nmf_tags_parent = parentTags; + item.nmf_tags_parent_weights = parentWeights; + + return item; + } + + /** + * Checks a field's value against another value (either from another field + * or a constant). If the test passes, then the item is emitted, otherwise + * the pipeline is aborted. + * + * Config: + * field Field to read the value to test. Left side of operator. + * op one of ==, !=, <, <=, >, >= + * rhsValue Constant value to compare against. Right side of operator. + * rhsField Field to read value to compare against. Right side of operator. + * + * NOTE: rhsValue takes precidence over rhsField. + */ + acceptItemByFieldValue(item, config) { + if (!(config.field in item)) { + return null; + } + let rhs = null; + if ("rhsValue" in config) { + rhs = config.rhsValue; + } else if ("rhsField" in config && config.rhsField in item) { + rhs = item[config.rhsField]; + } + if (rhs === null) { + return null; + } + + if ( + // eslint-disable-next-line eqeqeq + (config.op === "==" && item[config.field] == rhs) || + // eslint-disable-next-line eqeqeq + (config.op === "!=" && item[config.field] != rhs) || + (config.op === "<" && item[config.field] < rhs) || + (config.op === "<=" && item[config.field] <= rhs) || + (config.op === ">" && item[config.field] > rhs) || + (config.op === ">=" && item[config.field] >= rhs) + ) { + return item; + } + + return null; + } + + /** + * Splits a URL into text-like tokens. + * + * Config: + * field Field containing a URL + * dest Field to write the tokens to as an array of strings + * + * NOTE: Any initial 'www' on the hostname is removed. + */ + tokenizeUrl(item, config) { + if (!(config.field in item)) { + return null; + } + + let url = new URL(item[config.field]); + let domain = url.hostname; + if (domain.startsWith("www.")) { + domain = domain.substring(4); + } + let toks = this.tokenize(domain); + let pathToks = this.tokenize( + decodeURIComponent(url.pathname.replace(/\+/g, " ")) + ); + for (let tok of pathToks) { + toks.push(tok); + } + for (let pair of url.searchParams.entries()) { + let k = this.tokenize(decodeURIComponent(pair[0].replace(/\+/g, " "))); + for (let tok of k) { + toks.push(tok); + } + if (pair[1] !== null && pair[1] !== "") { + let v = this.tokenize(decodeURIComponent(pair[1].replace(/\+/g, " "))); + for (let tok of v) { + toks.push(tok); + } + } + } + item[config.dest] = toks; + + return item; + } + + /** + * Gets the hostname (minus any initial "www." along with the left most + * directories on the path. + * + * Config: + * field Field containing the URL + * dest Field to write the array of strings to + * path_length OPTIONAL (DEFAULT: 0) Number of leftmost subdirectories to include + */ + getUrlDomain(item, config) { + if (!(config.field in item)) { + return null; + } + + let url = new URL(item[config.field]); + let domain = url.hostname.toLocaleLowerCase(); + if (domain.startsWith("www.")) { + domain = domain.substring(4); + } + item[config.dest] = domain; + let pathLength = 0; + if ("path_length" in config) { + pathLength = config.path_length; + } + if (pathLength > 0) { + item[config.dest] += url.pathname + .toLocaleLowerCase() + .split("/") + .slice(0, pathLength + 1) + .join("/"); + } + + return item; + } + + /** + * Splits a field into tokens. + * Config: + * field Field containing a string to tokenize + * dest Field to write the array of strings to + */ + tokenizeField(item, config) { + if (!(config.field in item)) { + return null; + } + + item[config.dest] = this.tokenize(item[config.field]); + + return item; + } + + /** + * Deep copy from one field to another. + * Config: + * src Field to read from + * dest Field to write to + */ + copyValue(item, config) { + if (!(config.src in item)) { + return null; + } + + item[config.dest] = JSON.parse(JSON.stringify(item[config.src])); + + return item; + } + + /** + * Converts a field containing a map of strings to a map of strings + * to numbers, to a map of strings to numbers containing at most k elements. + * This operation is performed by first, promoting all the subkeys up one + * level, and then taking the top (or bottom) k values. + * + * Config: + * field Points to a map of strings to a map of strings to numbers + * k Maximum number of items to keep + * descending OPTIONAL (DEFAULT: True) Sorts score in descending order + * (i.e. keeps maximum) + */ + keepTopK(item, config) { + if (!(config.field in item)) { + return null; + } + let k = this._lookupScalar(item, config.k, 1048576); + let descending = !("descending" in config) || config.descending !== false; + + // we can't sort by the values in the map, so we have to convert this + // to an array, and then sort. + let sortable = []; + Object.keys(item[config.field]).forEach(outerKey => { + let innerType = this._typeOf(item[config.field][outerKey]); + if (innerType === "map") { + Object.keys(item[config.field][outerKey]).forEach(innerKey => { + sortable.push({ + key: innerKey, + value: item[config.field][outerKey][innerKey], + }); + }); + } else { + sortable.push({ key: outerKey, value: item[config.field][outerKey] }); + } + }); + + sortable.sort((a, b) => { + if (descending) { + return b.value - a.value; + } + return a.value - b.value; + }); + + // now take the top k + let newMap = {}; + let i = 0; + for (let pair of sortable) { + if (i >= k) { + break; + } + newMap[pair.key] = pair.value; + i++; + } + item[config.field] = newMap; + + return item; + } + + /** + * Scalar multiplies a vector by some constant + * + * Config: + * field Points to: + * a map of strings to numbers + * an array of numbers + * a number + * k Either a number, or a string. If it's a number then This + * is the scalar value to multiply by. If it's a string, + * the value in the pointed to field is used. + * default OPTIONAL (DEFAULT: 0), If k is a string, and no numeric + * value is found, then use this value. + */ + scalarMultiply(item, config) { + if (!(config.field in item)) { + return null; + } + let k = this._lookupScalar(item, config.k, config.dfault); + + let fieldType = this._typeOf(item[config.field]); + if (fieldType === "number") { + item[config.field] *= k; + } else if (fieldType === "array") { + for (let i = 0; i < item[config.field].length; i++) { + item[config.field][i] *= k; + } + } else if (fieldType === "map") { + Object.keys(item[config.field]).forEach(key => { + item[config.field][key] *= k; + }); + } else { + return null; + } + + return item; + } + + /** + * Elementwise multiplies either two maps or two arrays together, storing + * the result in left. If left and right are of the same type, results in an + * error. + * + * Maps are special case. For maps the left must be a nested map such as: + * { k1: { k11: 1, k12: 2}, k2: { k21: 3, k22: 4 } } and right needs to be + * simple map such as: { k1: 5, k2: 6} . The operation is then to mulitply + * every value of every right key, to every value every subkey where the + * parent keys match. Using the previous examples, the result would be: + * { k1: { k11: 5, k12: 10 }, k2: { k21: 18, k22: 24 } } . + * + * Config: + * left + * right + */ + elementwiseMultiply(item, config) { + if (!(config.left in item) || !(config.right in item)) { + return null; + } + let leftType = this._typeOf(item[config.left]); + if (leftType !== this._typeOf(item[config.right])) { + return null; + } + if (leftType === "array") { + if (item[config.left].length !== item[config.right].length) { + return null; + } + for (let i = 0; i < item[config.left].length; i++) { + item[config.left][i] *= item[config.right][i]; + } + } else if (leftType === "map") { + Object.keys(item[config.left]).forEach(outerKey => { + let r = 0.0; + if (outerKey in item[config.right]) { + r = item[config.right][outerKey]; + } + Object.keys(item[config.left][outerKey]).forEach(innerKey => { + item[config.left][outerKey][innerKey] *= r; + }); + }); + } else if (leftType === "number") { + item[config.left] *= item[config.right]; + } else { + return null; + } + + return item; + } + + /** + * Vector multiplies (i.e. dot products) two vectors and stores the result in + * third field. Both vectors must either by maps, or arrays of numbers with + * the same length. + * + * Config: + * left A field pointing to either a map of strings to numbers, + * or an array of numbers + * right A field pointing to either a map of strings to numbers, + * or an array of numbers + * dest The field to store the dot product. + */ + vectorMultiply(item, config) { + if (!(config.left in item) || !(config.right in item)) { + return null; + } + + let leftType = this._typeOf(item[config.left]); + if (leftType !== this._typeOf(item[config.right])) { + return null; + } + + let destVal = 0.0; + if (leftType === "array") { + if (item[config.left].length !== item[config.right].length) { + return null; + } + for (let i = 0; i < item[config.left].length; i++) { + destVal += item[config.left][i] * item[config.right][i]; + } + } else if (leftType === "map") { + Object.keys(item[config.left]).forEach(key => { + if (key in item[config.right]) { + destVal += item[config.left][key] * item[config.right][key]; + } + }); + } else { + return null; + } + + item[config.dest] = destVal; + return item; + } + + /** + * Adds a constant value to all elements in the field. Mathematically, + * this is the same as taking a 1-vector, scalar multiplying it by k, + * and then vector adding it to a field. + * + * Config: + * field A field pointing to either a map of strings to numbers, + * or an array of numbers + * k Either a number, or a string. If it's a number then This + * is the scalar value to multiply by. If it's a string, + * the value in the pointed to field is used. + * default OPTIONAL (DEFAULT: 0), If k is a string, and no numeric + * value is found, then use this value. + */ + scalarAdd(item, config) { + let k = this._lookupScalar(item, config.k, config.dfault); + if (!(config.field in item)) { + return null; + } + + let fieldType = this._typeOf(item[config.field]); + if (fieldType === "array") { + for (let i = 0; i < item[config.field].length; i++) { + item[config.field][i] += k; + } + } else if (fieldType === "map") { + Object.keys(item[config.field]).forEach(key => { + item[config.field][key] += k; + }); + } else if (fieldType === "number") { + item[config.field] += k; + } else { + return null; + } + + return item; + } + + /** + * Adds two vectors together and stores the result in left. + * + * Config: + * left A field pointing to either a map of strings to numbers, + * or an array of numbers + * right A field pointing to either a map of strings to numbers, + * or an array of numbers + */ + vectorAdd(item, config) { + if (!(config.left in item)) { + return this.copyValue(item, { src: config.right, dest: config.left }); + } + if (!(config.right in item)) { + return null; + } + + let leftType = this._typeOf(item[config.left]); + if (leftType !== this._typeOf(item[config.right])) { + return null; + } + if (leftType === "array") { + if (item[config.left].length !== item[config.right].length) { + return null; + } + for (let i = 0; i < item[config.left].length; i++) { + item[config.left][i] += item[config.right][i]; + } + return item; + } else if (leftType === "map") { + Object.keys(item[config.right]).forEach(key => { + let v = 0; + if (key in item[config.left]) { + v = item[config.left][key]; + } + item[config.left][key] = v + item[config.right][key]; + }); + return item; + } + + return null; + } + + /** + * Converts a vector from real values to boolean integers. (i.e. either 1/0 + * or 1/-1). + * + * Config: + * field Field containing either a map of strings to numbers or + * an array of numbers to convert. + * threshold OPTIONAL (DEFAULT: 0) Values above this will be replaced + * with 1.0. Those below will be converted to 0. + * keep_negative OPTIONAL (DEFAULT: False) If true, values below the + * threshold will be converted to -1 instead of 0. + */ + makeBoolean(item, config) { + if (!(config.field in item)) { + return null; + } + let threshold = this._lookupScalar(item, config.threshold, 0.0); + let type = this._typeOf(item[config.field]); + if (type === "array") { + for (let i = 0; i < item[config.field].length; i++) { + if (item[config.field][i] > threshold) { + item[config.field][i] = 1.0; + } else if (config.keep_negative) { + item[config.field][i] = -1.0; + } else { + item[config.field][i] = 0.0; + } + } + } else if (type === "map") { + Object.keys(item[config.field]).forEach(key => { + let value = item[config.field][key]; + if (value > threshold) { + item[config.field][key] = 1.0; + } else if (config.keep_negative) { + item[config.field][key] = -1.0; + } else { + item[config.field][key] = 0.0; + } + }); + } else if (type === "number") { + let value = item[config.field]; + if (value > threshold) { + item[config.field] = 1.0; + } else if (config.keep_negative) { + item[config.field] = -1.0; + } else { + item[config.field] = 0.0; + } + } else { + return null; + } + + return item; + } + + /** + * Removes all keys from the item except for the ones specified. + * + * fields An array of strings indicating the fields to keep + */ + allowFields(item, config) { + let newItem = {}; + for (let ele of config.fields) { + if (ele in item) { + newItem[ele] = item[ele]; + } + } + return newItem; + } + + /** + * Removes all keys whose value does not exceed some threshold. + * + * Config: + * field Points to a map of strings to numbers + * threshold Values must exceed this value, otherwise they are removed. + */ + filterByValue(item, config) { + if (!(config.field in item)) { + return null; + } + let threshold = this._lookupScalar(item, config.threshold, 0.0); + let filtered = {}; + Object.keys(item[config.field]).forEach(key => { + let value = item[config.field][key]; + if (value > threshold) { + filtered[key] = value; + } + }); + item[config.field] = filtered; + + return item; + } + + /** + * Rewrites a field so that its values are now L2 normed. + * + * Config: + * field Points to a map of strings to numbers, or an array of numbers + */ + l2Normalize(item, config) { + if (!(config.field in item)) { + return null; + } + let data = item[config.field]; + let type = this._typeOf(data); + if (type === "array") { + let norm = 0.0; + for (let datum of data) { + norm += datum * datum; + } + norm = Math.sqrt(norm); + if (norm !== 0) { + for (let i = 0; i < data.length; i++) { + data[i] /= norm; + } + } + } else if (type === "map") { + let norm = 0.0; + Object.keys(data).forEach(key => { + norm += data[key] * data[key]; + }); + norm = Math.sqrt(norm); + if (norm !== 0) { + Object.keys(data).forEach(key => { + data[key] /= norm; + }); + } + } else { + return null; + } + + item[config.field] = data; + + return item; + } + + /** + * Rewrites a field so that all of its values sum to 1.0 + * + * Config: + * field Points to a map of strings to numbers, or an array of numbers + */ + probNormalize(item, config) { + if (!(config.field in item)) { + return null; + } + let data = item[config.field]; + let type = this._typeOf(data); + if (type === "array") { + let norm = 0.0; + for (let datum of data) { + norm += datum; + } + if (norm !== 0) { + for (let i = 0; i < data.length; i++) { + data[i] /= norm; + } + } + } else if (type === "map") { + let norm = 0.0; + Object.keys(item[config.field]).forEach(key => { + norm += item[config.field][key]; + }); + if (norm !== 0) { + Object.keys(item[config.field]).forEach(key => { + item[config.field][key] /= norm; + }); + } + } else { + return null; + } + + return item; + } + + /** + * Stores a value, if it is not already present + * + * Config: + * field field to write to if it is missing + * value value to store in that field + */ + setDefault(item, config) { + let val = this._lookupScalar(item, config.value, config.value); + if (!(config.field in item)) { + item[config.field] = val; + } + + return item; + } + + /** + * Selctively promotes an value from an inner map up to the outer map + * + * Config: + * haystack Points to a map of strings to values + * needle Key inside the map we should promote up + * dest Where we should write the value of haystack[needle] + */ + lookupValue(item, config) { + if (config.haystack in item && config.needle in item[config.haystack]) { + item[config.dest] = item[config.haystack][config.needle]; + } + + return item; + } + + /** + * Demotes a field into a map + * + * Config: + * src Field to copy + * dest_map Points to a map + * dest_key Key inside dest_map to copy src to + */ + copyToMap(item, config) { + if (config.src in item) { + if (!(config.dest_map in item)) { + item[config.dest_map] = {}; + } + item[config.dest_map][config.dest_key] = item[config.src]; + } + + return item; + } + + /** + * Config: + * field Points to a string to number map + * k Scalar to multiply the values by + * log_scale Boolean, if true, then the values will be transformed + * by a logrithm prior to multiplications + */ + scalarMultiplyTag(item, config) { + let EPSILON = 0.000001; + if (!(config.field in item)) { + return null; + } + let k = this._lookupScalar(item, config.k, 1); + let type = this._typeOf(item[config.field]); + if (type === "map") { + Object.keys(item[config.field]).forEach(parentKey => { + Object.keys(item[config.field][parentKey]).forEach(key => { + let v = item[config.field][parentKey][key]; + if (config.log_scale) { + v = Math.log(v + EPSILON); + } + item[config.field][parentKey][key] = v * k; + }); + }); + } else { + return null; + } + + return item; + } + + /** + * Independently applies softmax across all subtags. + * + * Config: + * field Points to a map of strings with values being another map of strings + */ + applySoftmaxTags(item, config) { + let type = this._typeOf(item[config.field]); + if (type !== "map") { + return null; + } + + let abort = false; + let softmaxSum = {}; + Object.keys(item[config.field]).forEach(tag => { + if (this._typeOf(item[config.field][tag]) !== "map") { + abort = true; + return; + } + if (abort) { + return; + } + softmaxSum[tag] = 0; + Object.keys(item[config.field][tag]).forEach(subtag => { + if (this._typeOf(item[config.field][tag][subtag]) !== "number") { + abort = true; + return; + } + let score = item[config.field][tag][subtag]; + softmaxSum[tag] += Math.exp(score); + }); + }); + if (abort) { + return null; + } + + Object.keys(item[config.field]).forEach(tag => { + Object.keys(item[config.field][tag]).forEach(subtag => { + item[config.field][tag][subtag] = + Math.exp(item[config.field][tag][subtag]) / softmaxSum[tag]; + }); + }); + + return item; + } + + /** + * Vector adds a field and stores the result in left. + * + * Config: + * field The field to vector add + */ + combinerAdd(left, right, config) { + if (!(config.field in right)) { + return left; + } + let type = this._typeOf(right[config.field]); + if (!(config.field in left)) { + if (type === "map") { + left[config.field] = {}; + } else if (type === "array") { + left[config.field] = []; + } else if (type === "number") { + left[config.field] = 0; + } else { + return null; + } + } + if (type !== this._typeOf(left[config.field])) { + return null; + } + if (type === "map") { + Object.keys(right[config.field]).forEach(key => { + if (!(key in left[config.field])) { + left[config.field][key] = 0; + } + left[config.field][key] += right[config.field][key]; + }); + } else if (type === "array") { + for (let i = 0; i < right[config.field].length; i++) { + if (i < left[config.field].length) { + left[config.field][i] += right[config.field][i]; + } else { + left[config.field].push(right[config.field][i]); + } + } + } else if (type === "number") { + left[config.field] += right[config.field]; + } else { + return null; + } + + return left; + } + + /** + * Stores the maximum value of the field in left. + * + * Config: + * field The field to vector add + */ + combinerMax(left, right, config) { + if (!(config.field in right)) { + return left; + } + let type = this._typeOf(right[config.field]); + if (!(config.field in left)) { + if (type === "map") { + left[config.field] = {}; + } else if (type === "array") { + left[config.field] = []; + } else if (type === "number") { + left[config.field] = 0; + } else { + return null; + } + } + if (type !== this._typeOf(left[config.field])) { + return null; + } + if (type === "map") { + Object.keys(right[config.field]).forEach(key => { + if ( + !(key in left[config.field]) || + right[config.field][key] > left[config.field][key] + ) { + left[config.field][key] = right[config.field][key]; + } + }); + } else if (type === "array") { + for (let i = 0; i < right[config.field].length; i++) { + if (i < left[config.field].length) { + if (left[config.field][i] < right[config.field][i]) { + left[config.field][i] = right[config.field][i]; + } + } else { + left[config.field].push(right[config.field][i]); + } + } + } else if (type === "number") { + if (left[config.field] < right[config.field]) { + left[config.field] = right[config.field]; + } + } else { + return null; + } + + return left; + } + + /** + * Associates a value in right with another value in right. This association + * is then stored in a map in left. + * + * For example: If a sequence of rights is: + * { 'tags': {}, 'url_domain': 'maseratiusa.com/maserati', 'time': 41 } + * { 'tags': {}, 'url_domain': 'mbusa.com/mercedes', 'time': 21 } + * { 'tags': {}, 'url_domain': 'maseratiusa.com/maserati', 'time': 34 } + * + * Then assuming a 'sum' operation, left can build a map that would look like: + * { + * 'maseratiusa.com/maserati': 75, + * 'mbusa.com/mercedes': 21, + * } + * + * Fields: + * left_field field in the left to store / update the map + * right_key_field Field in the right to use as a key + * right_value_field Field in the right to use as a value + * operation One of "sum", "max", "overwrite", "count" + */ + combinerCollectValues(left, right, config) { + let op; + if (config.operation === "sum") { + op = (a, b) => a + b; + } else if (config.operation === "max") { + op = (a, b) => (a > b ? a : b); + } else if (config.operation === "overwrite") { + op = (a, b) => b; + } else if (config.operation === "count") { + op = (a, b) => a + 1; + } else { + return null; + } + if (!(config.left_field in left)) { + left[config.left_field] = {}; + } + if ( + !(config.right_key_field in right) || + !(config.right_value_field in right) + ) { + return left; + } + + let key = right[config.right_key_field]; + let rightValue = right[config.right_value_field]; + let leftValue = 0.0; + if (key in left[config.left_field]) { + leftValue = left[config.left_field][key]; + } + + left[config.left_field][key] = op(leftValue, rightValue); + + return left; + } + + /** + * Executes a recipe. Returns an object on success, or null on failure. + */ + executeRecipe(item, recipe) { + let newItem = item; + if (recipe) { + for (let step of recipe) { + let op = this.ITEM_BUILDER_REGISTRY[step.function]; + if (op === undefined) { + return null; + } + newItem = op.call(this, newItem, step); + if (newItem === null) { + break; + } + } + } + return newItem; + } + + /** + * Executes a recipe. Returns an object on success, or null on failure. + */ + executeCombinerRecipe(item1, item2, recipe) { + let newItem1 = item1; + for (let step of recipe) { + let op = this.ITEM_COMBINER_REGISTRY[step.function]; + if (op === undefined) { + return null; + } + newItem1 = op.call(this, newItem1, item2, step); + if (newItem1 === null) { + break; + } + } + + return newItem1; + } +}; -- cgit v1.2.3