/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import { FormAutofill } from "resource://autofill/FormAutofill.sys.mjs"; import { HeuristicsRegExp } from "resource://gre/modules/shared/HeuristicsRegExp.sys.mjs"; const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { CreditCard: "resource://gre/modules/CreditCard.sys.mjs", CreditCardRulesets: "resource://gre/modules/shared/CreditCardRuleset.sys.mjs", FieldScanner: "resource://gre/modules/shared/FieldScanner.sys.mjs", FormAutofillUtils: "resource://gre/modules/shared/FormAutofillUtils.sys.mjs", LabelUtils: "resource://gre/modules/shared/LabelUtils.sys.mjs", }); /** * To help us classify sections, we want to know what fields can appear * multiple times in a row. * Such fields, like `address-line{X}`, should not break sections. */ const MULTI_FIELD_NAMES = [ "address-level3", "address-level2", "address-level1", "tel", "postal-code", "email", "street-address", ]; /** * To help us classify sections that can appear only N times in a row. * For example, the only time multiple cc-number fields are valid is when * there are four of these fields in a row. * Otherwise, multiple cc-number fields should be in separate sections. */ const MULTI_N_FIELD_NAMES = { "cc-number": 4, }; export class FormSection { static ADDRESS = "address"; static CREDIT_CARD = "creditCard"; #fieldDetails = []; #name = ""; constructor(fieldDetails) { if (!fieldDetails.length) { throw new TypeError("A section should contain at least one field"); } fieldDetails.forEach(field => this.addField(field)); const fieldName = fieldDetails[0].fieldName; if (lazy.FormAutofillUtils.isAddressField(fieldName)) { this.type = FormSection.ADDRESS; } else if (lazy.FormAutofillUtils.isCreditCardField(fieldName)) { this.type = FormSection.CREDIT_CARD; } else { throw new Error("Unknown field type to create a section."); } } get fieldDetails() { return this.#fieldDetails; } get name() { return this.#name; } addField(fieldDetail) { this.#name ||= fieldDetail.sectionName; this.#fieldDetails.push(fieldDetail); } } /** * Returns the autocomplete information of fields according to heuristics. */ export const FormAutofillHeuristics = { RULES: HeuristicsRegExp.getRules(), LABEL_RULES: HeuristicsRegExp.getLabelRules(), CREDIT_CARD_FIELDNAMES: [], ADDRESS_FIELDNAMES: [], /** * Try to find a contiguous sub-array within an array. * * @param {Array} array * @param {Array} subArray * * @returns {boolean} * Return whether subArray was found within the array or not. */ _matchContiguousSubArray(array, subArray) { return array.some((elm, i) => subArray.every((sElem, j) => sElem == array[i + j]) ); }, /** * Try to find the field that is look like a month select. * * @param {DOMElement} element * @returns {boolean} * Return true if we observe the trait of month select in * the current element. */ _isExpirationMonthLikely(element) { if (!HTMLSelectElement.isInstance(element)) { return false; } const options = [...element.options]; const desiredValues = Array(12) .fill(1) .map((v, i) => v + i); // The number of month options shouldn't be less than 12 or larger than 13 // including the default option. if (options.length < 12 || options.length > 13) { return false; } return ( this._matchContiguousSubArray( options.map(e => +e.value), desiredValues ) || this._matchContiguousSubArray( options.map(e => +e.label), desiredValues ) ); }, /** * Try to find the field that is look like a year select. * * @param {DOMElement} element * @returns {boolean} * Return true if we observe the trait of year select in * the current element. */ _isExpirationYearLikely(element) { if (!HTMLSelectElement.isInstance(element)) { return false; } const options = [...element.options]; // A normal expiration year select should contain at least the last three years // in the list. const curYear = new Date().getFullYear(); const desiredValues = Array(3) .fill(0) .map((v, i) => v + curYear + i); return ( this._matchContiguousSubArray( options.map(e => +e.value), desiredValues ) || this._matchContiguousSubArray( options.map(e => +e.label), desiredValues ) ); }, /** * Try to match the telephone related fields to the grammar * list to see if there is any valid telephone set and correct their * field names. * * @param {FieldScanner} scanner * The current parsing status for all elements * @returns {boolean} * Return true if there is any field can be recognized in the parser, * otherwise false. */ _parsePhoneFields(scanner, _fieldDetail) { let matchingResult; const GRAMMARS = this.PHONE_FIELD_GRAMMARS; function isGrammarSeparator(index) { return !GRAMMARS[index][0]; } const savedIndex = scanner.parsingIndex; for (let ruleFrom = 0; ruleFrom < GRAMMARS.length; ) { const detailStart = scanner.parsingIndex; let ruleTo = ruleFrom; for (let count = 0; ruleTo < GRAMMARS.length; ruleTo++, count++) { // Bail out when reaching the end of the current set of grammars // or there are no more elements to parse if ( isGrammarSeparator(ruleTo) || !scanner.elementExisting(detailStart + count) ) { break; } const [category, , length] = GRAMMARS[ruleTo]; const detail = scanner.getFieldDetailByIndex(detailStart + count); // If the field is not what this grammar rule is interested in, skip processing. if ( !detail || detail.fieldName != category || detail.reason == "autocomplete" ) { break; } const element = detail.element; if (length && (!element.maxLength || length < element.maxLength)) { break; } } // if we reach the grammar separator, that means all the previous rules are matched. // Set the matchingResult so we update field names accordingly. if (isGrammarSeparator(ruleTo)) { matchingResult = { ruleFrom, ruleTo }; break; } // Fast forward to the next rule set. for (; ruleFrom < GRAMMARS.length; ) { if (isGrammarSeparator(ruleFrom++)) { break; } } } if (matchingResult) { const { ruleFrom, ruleTo } = matchingResult; for (let i = ruleFrom; i < ruleTo; i++) { scanner.updateFieldName(scanner.parsingIndex, GRAMMARS[i][1]); scanner.parsingIndex++; } } // If the previous parsed field is a "tel" field, run heuristic to see // if the current field is a "tel-extension" field const field = scanner.getFieldDetailByIndex(scanner.parsingIndex); if (field && field.reason != "autocomplete") { const prev = scanner.getFieldDetailByIndex(scanner.parsingIndex - 1); if ( prev && lazy.FormAutofillUtils.getCategoryFromFieldName(prev.fieldName) == "tel" ) { const regExpTelExtension = new RegExp( "\\bext|ext\\b|extension|ramal", // pt-BR, pt-PT "iug" ); if (this._matchRegexp(field.element, regExpTelExtension)) { scanner.updateFieldName(scanner.parsingIndex, "tel-extension"); scanner.parsingIndex++; } } } return savedIndex != scanner.parsingIndex; }, /** * Try to find the correct address-line[1-3] sequence and correct their field * names. * * @param {FieldScanner} scanner * The current parsing status for all elements * @returns {boolean} * Return true if there is any field can be recognized in the parser, * otherwise false. */ _parseStreetAddressFields(scanner, _fieldDetail) { const INTERESTED_FIELDS = [ "street-address", "address-line1", "address-line2", "address-line3", ]; const fields = []; for (let idx = scanner.parsingIndex; !scanner.parsingFinished; idx++) { const detail = scanner.getFieldDetailByIndex(idx); if (!INTERESTED_FIELDS.includes(detail?.fieldName)) { break; } fields.push(detail); } if (!fields.length) { return false; } switch (fields.length) { case 1: if ( fields[0].reason != "autocomplete" && ["address-line2", "address-line3"].includes(fields[0].fieldName) ) { scanner.updateFieldName(scanner.parsingIndex, "address-line1"); } break; case 2: if (fields[0].reason == "autocomplete") { if ( fields[0].fieldName == "street-address" && (fields[1].fieldName == "address-line2" || fields[1].reason != "autocomplete") ) { scanner.updateFieldName( scanner.parsingIndex, "address-line1", true ); } } else { scanner.updateFieldName(scanner.parsingIndex, "address-line1"); } scanner.updateFieldName(scanner.parsingIndex + 1, "address-line2"); break; case 3: default: scanner.updateFieldName(scanner.parsingIndex, "address-line1"); scanner.updateFieldName(scanner.parsingIndex + 1, "address-line2"); scanner.updateFieldName(scanner.parsingIndex + 2, "address-line3"); break; } scanner.parsingIndex += fields.length; return true; }, _parseAddressFields(scanner, fieldDetail) { const INTERESTED_FIELDS = ["address-level1", "address-level2"]; if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) { return false; } const fields = []; for (let idx = scanner.parsingIndex; !scanner.parsingFinished; idx++) { const detail = scanner.getFieldDetailByIndex(idx); if (!INTERESTED_FIELDS.includes(detail?.fieldName)) { break; } fields.push(detail); } if (!fields.length) { return false; } // State & City(address-level2) if (fields.length == 1) { if (fields[0].fieldName == "address-level2") { const prev = scanner.getFieldDetailByIndex(scanner.parsingIndex - 1); if ( prev && !prev.fieldName && HTMLSelectElement.isInstance(prev.element) ) { scanner.updateFieldName(scanner.parsingIndex - 1, "address-level1"); scanner.parsingIndex += 1; return true; } const next = scanner.getFieldDetailByIndex(scanner.parsingIndex + 1); if ( next && !next.fieldName && HTMLSelectElement.isInstance(next.element) ) { scanner.updateFieldName(scanner.parsingIndex + 1, "address-level1"); scanner.parsingIndex += 2; return true; } } } scanner.parsingIndex += fields.length; return true; }, /** * Try to look for expiration date fields and revise the field names if needed. * * @param {FieldScanner} scanner * The current parsing status for all elements * @returns {boolean} * Return true if there is any field can be recognized in the parser, * otherwise false. */ _parseCreditCardExpiryFields(scanner, fieldDetail) { const INTERESTED_FIELDS = ["cc-exp", "cc-exp-month", "cc-exp-year"]; if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) { return false; } const fields = []; for (let idx = scanner.parsingIndex; ; idx++) { const detail = scanner.getFieldDetailByIndex(idx); if (!INTERESTED_FIELDS.includes(detail?.fieldName)) { break; } fields.push(detail); } // Don't process the fields if expiration month and expiration year are already // matched by regex in correct order. if ( (fields.length == 1 && fields[0].fieldName == "cc-exp") || (fields.length == 2 && fields[0].fieldName == "cc-exp-month" && fields[1].fieldName == "cc-exp-year") ) { scanner.parsingIndex += fields.length; return true; } const prevCCFields = new Set(); for (let idx = scanner.parsingIndex - 1; ; idx--) { const detail = scanner.getFieldDetailByIndex(idx); if ( lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) != "creditCard" ) { break; } prevCCFields.add(detail.fieldName); } // We update the "cc-exp-*" fields to correct "cc-ex-*" fields order when // the following conditions are met: // 1. The previous elements are identified as credit card fields and // cc-number is in it // 2. There is no "cc-exp-*" fields in the previous credit card elements if ( ["cc-number", "cc-name"].some(f => prevCCFields.has(f)) && !["cc-exp", "cc-exp-month", "cc-exp-year"].some(f => prevCCFields.has(f)) ) { if (fields.length == 1) { scanner.updateFieldName(scanner.parsingIndex, "cc-exp"); } else if (fields.length == 2) { scanner.updateFieldName(scanner.parsingIndex, "cc-exp-month"); scanner.updateFieldName(scanner.parsingIndex + 1, "cc-exp-year"); } scanner.parsingIndex += fields.length; return true; } // Set field name to null as it failed to match any patterns. for (let idx = 0; idx < fields.length; idx++) { scanner.updateFieldName(scanner.parsingIndex + idx, null); } return false; }, /** * Look for cc-*-name fields when *-name field is present * * @param {FieldScanner} scanner * The current parsing status for all elements * @returns {boolean} * Return true if there is any field can be recognized in the parser, * otherwise false. */ _parseCreditCardNameFields(scanner, fieldDetail) { const INTERESTED_FIELDS = [ "name", "given-name", "additional-name", "family-name", ]; if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) { return false; } const fields = []; for (let idx = scanner.parsingIndex; ; idx++) { const detail = scanner.getFieldDetailByIndex(idx); if (!INTERESTED_FIELDS.includes(detail?.fieldName)) { break; } fields.push(detail); } const prevCCFields = new Set(); for (let idx = scanner.parsingIndex - 1; ; idx--) { const detail = scanner.getFieldDetailByIndex(idx); if ( lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) != "creditCard" ) { break; } prevCCFields.add(detail.fieldName); } // We update the "name" fields to "cc-name" fields when the following // conditions are met: // 1. The preceding fields are identified as credit card fields and // contain the "cc-number" field. // 2. No "cc-name-*" field is found among the preceding credit card fields. // 3. The "cc-csc" field is not present among the preceding credit card fields. if ( ["cc-number"].some(f => prevCCFields.has(f)) && !["cc-name", "cc-given-name", "cc-family-name", "cc-csc"].some(f => prevCCFields.has(f) ) ) { // If there is only one field, assume the name field a `cc-name` field if (fields.length == 1) { scanner.updateFieldName(scanner.parsingIndex, `cc-name`); scanner.parsingIndex += 1; } else { // update *-name to cc-*-name for (const field of fields) { scanner.updateFieldName( scanner.parsingIndex, `cc-${field.fieldName}` ); scanner.parsingIndex += 1; } } return true; } return false; }, /** * This function should provide all field details of a form which are placed * in the belonging section. The details contain the autocomplete info * (e.g. fieldName, section, etc). * * @param {HTMLFormElement} form * the elements in this form to be predicted the field info. * @returns {Array} * all sections within its field details in the form. */ getFormInfo(form) { const elements = Array.from(form.elements).filter(element => lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(element) ); const scanner = new lazy.FieldScanner(elements, element => this.inferFieldInfo(element, elements) ); while (!scanner.parsingFinished) { const savedIndex = scanner.parsingIndex; // First, we get the inferred field info const fieldDetail = scanner.getFieldDetailByIndex(scanner.parsingIndex); if ( this._parsePhoneFields(scanner, fieldDetail) || this._parseStreetAddressFields(scanner, fieldDetail) || this._parseAddressFields(scanner, fieldDetail) || this._parseCreditCardExpiryFields(scanner, fieldDetail) || this._parseCreditCardNameFields(scanner, fieldDetail) ) { continue; } // If there is no field parsed, the parsing cursor can be moved // forward to the next one. if (savedIndex == scanner.parsingIndex) { scanner.parsingIndex++; } } lazy.LabelUtils.clearLabelMap(); const fields = scanner.fieldDetails; const sections = [ ...this._classifySections( fields.filter(f => lazy.FormAutofillUtils.isAddressField(f.fieldName)) ), ...this._classifySections( fields.filter(f => lazy.FormAutofillUtils.isCreditCardField(f.fieldName) ) ), ]; return sections.sort( (a, b) => fields.indexOf(a.fieldDetails[0]) - fields.indexOf(b.fieldDetails[0]) ); }, /** * The result is an array contains the sections with its belonging field details. * * @param {Array} fieldDetails field detail array to be classified * @returns {Array} The array with the sections. */ _classifySections(fieldDetails) { let sections = []; for (let i = 0; i < fieldDetails.length; i++) { const cur = fieldDetails[i]; const [currentSection] = sections.slice(-1); // The section this field might be placed into. let candidateSection = null; // Use name group from autocomplete attribute (ex, section-xxx) to look for the section // we might place this field into. // If the field doesn't have a section name, the candidate section is the previous section. if (!currentSection || !cur.sectionName) { candidateSection = currentSection; } else if (cur.sectionName) { // If the field has a section name, the candidate section is the nearest section that // either shares the same name or lacks a name. for (let idx = sections.length - 1; idx >= 0; idx--) { if (!sections[idx].name || sections[idx].name == cur.sectionName) { candidateSection = sections[idx]; break; } } } if (candidateSection) { let createNewSection = true; // We might create a new section instead of placing the field in the candiate section if // the section already has a field with the same field name. // We also check visibility for both the fields with the same field name because we don't // wanht to create a new section for an invisible field. if ( candidateSection.fieldDetails.find( f => f.fieldName == cur.fieldName && f.isVisible && cur.isVisible ) ) { // For some field type, it is common to have multiple fields in one section, for example, // email. In that case, we will not create a new section even when the candidate section // already has a field with the same field name. const [lastFieldDetail] = candidateSection.fieldDetails.slice(-1); if (lastFieldDetail.fieldName == cur.fieldName) { if (MULTI_FIELD_NAMES.includes(cur.fieldName)) { createNewSection = false; } else if (cur.fieldName in MULTI_N_FIELD_NAMES) { // This is the heuristic to handle special cases where we can have multiple // fields in one section, but only if the field has appeared N times in a row. // For example, websites can use 4 consecutive 4-digit `cc-number` fields // instead of one 16-digit `cc-number` field. const N = MULTI_N_FIELD_NAMES[cur.fieldName]; if (lastFieldDetail.part) { // If `part` is set, we have already identified this field can be // merged previously if (lastFieldDetail.part < N) { createNewSection = false; fieldDetails[i].part = lastFieldDetail.part + 1; } // If the next N fields are all the same field, we can merge them } else if ( N == 2 || fieldDetails .slice(i + 1, i + N - 1) .every(f => f.fieldName == cur.fieldName) ) { lastFieldDetail.part = 1; fieldDetails[i].part = 2; createNewSection = false; } } } } else { // The field doesn't exist in the candidate section, add it. createNewSection = false; } if (!createNewSection) { candidateSection.addField(fieldDetails[i]); continue; } } // Create a new section sections.push(new FormSection([fieldDetails[i]])); } return sections; }, _getPossibleFieldNames(element) { let fieldNames = []; const isAutoCompleteOff = element.autocomplete == "off" || element.form?.autocomplete == "off"; if (!isAutoCompleteOff || FormAutofill.creditCardsAutocompleteOff) { fieldNames.push(...this.CREDIT_CARD_FIELDNAMES); } if (!isAutoCompleteOff || FormAutofill.addressesAutocompleteOff) { fieldNames.push(...this.ADDRESS_FIELDNAMES); } if (HTMLSelectElement.isInstance(element)) { const FIELDNAMES_FOR_SELECT_ELEMENT = [ "address-level1", "address-level2", "country", "cc-exp-month", "cc-exp-year", "cc-exp", "cc-type", ]; fieldNames = fieldNames.filter(name => FIELDNAMES_FOR_SELECT_ELEMENT.includes(name) ); } return fieldNames; }, /** * Get inferred information about an input element using autocomplete info, fathom and regex-based heuristics. * * @param {HTMLElement} element - The input element to infer information about. * @param {Array} elements - See `getFathomField` for details * @returns {Array} - An array containing: * [0]the inferred field name * [1]autocomplete information if the element has autocompelte attribute, null otherwise. * [2]fathom confidence if fathom considers it a cc field, null otherwise. */ inferFieldInfo(element, elements = []) { const autocompleteInfo = element.getAutocompleteInfo(); // An input[autocomplete="on"] will not be early return here since it stll // needs to find the field name. if ( autocompleteInfo?.fieldName && !["on", "off"].includes(autocompleteInfo.fieldName) ) { return [autocompleteInfo.fieldName, autocompleteInfo, null]; } const fields = this._getPossibleFieldNames(element); // "email" type of input is accurate for heuristics to determine its Email // field or not. However, "tel" type is used for ZIP code for some web site // (e.g. HomeDepot, BestBuy), so "tel" type should be not used for "tel" // prediction. if (element.type == "email" && fields.includes("email")) { return ["email", null, null]; } if (lazy.FormAutofillUtils.isFathomCreditCardsEnabled()) { // We don't care fields that are not supported by fathom const fathomFields = fields.filter(r => lazy.CreditCardRulesets.types.includes(r) ); const [matchedFieldName, confidence] = this.getFathomField( element, fathomFields, elements ); // At this point, use fathom's recommendation if it has one if (matchedFieldName) { return [matchedFieldName, null, confidence]; } // Continue to run regex-based heuristics even when fathom doesn't recognize // the field. Since the regex-based heuristic has good search coverage but // has a worse precision. We use it in conjunction with fathom to maximize // our search coverage. For example, when a is not considered cc-name // by fathom but is considered cc-name by regex-based heuristic, if the form // also contains a cc-number identified by fathom, we will treat the form as a // valid cc form; hence both cc-number & cc-name are identified. } // Check every select for options that // match credit card network names in value or label. if (HTMLSelectElement.isInstance(element)) { if (this._isExpirationMonthLikely(element)) { return ["cc-exp-month", null, null]; } else if (this._isExpirationYearLikely(element)) { return ["cc-exp-year", null, null]; } const options = Array.from(element.querySelectorAll("option")); if ( options.find( option => lazy.CreditCard.getNetworkFromName(option.value) || lazy.CreditCard.getNetworkFromName(option.text) ) ) { return ["cc-type", null, null]; } // At least two options match the country name, otherwise some state name might // also match a country name, ex, Georgia. We check the last two // options rather than the first, as selects often start with a non-country display option. const countryDisplayNames = Array.from(FormAutofill.countries.values()); if ( options.length >= 2 && options .slice(-2) .every( option => countryDisplayNames.includes(option.value) || countryDisplayNames.includes(option.text) ) ) { return ["country", null, null]; } } // Find a matched field name using regexp-based heuristics const matchedFieldName = this._findMatchedFieldName(element, fields); return [matchedFieldName, null, null]; }, /** * Using Fathom, say what kind of CC field an element is most likely to be. * This function deoesn't only run fathom on the passed elements. It also * runs fathom for all elements in the FieldScanner for optimization purpose. * * @param {HTMLElement} element * @param {Array} fields * @param {Array} elements - All other eligible elements in the same form. This is mainly used as an * optimization approach to run fathom model on all eligible elements * once instead of one by one * @returns {Array} A tuple of [field name, probability] describing the * highest-confidence classification */ getFathomField(element, fields, elements = []) { if (!fields.length) { return [null, null]; } if (!this._fathomConfidences?.get(element)) { this._fathomConfidences = new Map(); // This should not throw unless we run into an OOM situation, at which // point we have worse problems and this failing is not a big deal. elements = elements.includes(element) ? elements : [element]; const confidences = this.getFormAutofillConfidences(elements); for (let i = 0; i < elements.length; i++) { this._fathomConfidences.set(elements[i], confidences[i]); } } const elementConfidences = this._fathomConfidences.get(element); if (!elementConfidences) { return [null, null]; } let highestField = null; let highestConfidence = lazy.FormAutofillUtils.ccFathomConfidenceThreshold; // Start with a threshold of 0.5 for (let [key, value] of Object.entries(elementConfidences)) { if (!fields.includes(key)) { // ignore field that we don't care continue; } if (value > highestConfidence) { highestConfidence = value; highestField = key; } } if (!highestField) { return [null, null]; } // Used by test ONLY! This ensure testcases always get the same confidence if (lazy.FormAutofillUtils.ccFathomTestConfidence > 0) { highestConfidence = lazy.FormAutofillUtils.ccFathomTestConfidence; } return [highestField, highestConfidence]; }, /** * @param {Array} elements Array of elements that we want to get result from fathom cc rules * @returns {object} Fathom confidence keyed by field-type. */ getFormAutofillConfidences(elements) { if ( lazy.FormAutofillUtils.ccHeuristicsMode == lazy.FormAutofillUtils.CC_FATHOM_NATIVE ) { const confidences = ChromeUtils.getFormAutofillConfidences(elements); return confidences.map(c => { let result = {}; for (let [fieldName, confidence] of Object.entries(c)) { let type = lazy.FormAutofillUtils.formAutofillConfidencesKeyToCCFieldType( fieldName ); result[type] = confidence; } return result; }); } return elements.map(element => { /** * Return how confident our ML model is that `element` is a field of the * given type. * * @param {string} fieldName The Fathom type to check against. This is * conveniently the same as the autocomplete attribute value that means * the same thing. * @returns {number} Confidence in range [0, 1] */ function confidence(fieldName) { const ruleset = lazy.CreditCardRulesets[fieldName]; const fnodes = ruleset.against(element).get(fieldName); // fnodes is either 0 or 1 item long, since we ran the ruleset // against a single element: return fnodes.length ? fnodes[0].scoreFor(fieldName) : 0; } // Bang the element against the ruleset for every type of field: const confidences = {}; lazy.CreditCardRulesets.types.map(fieldName => { confidences[fieldName] = confidence(fieldName); }); return confidences; }); }, /** * @typedef ElementStrings * @type {object} * @yields {string} id - element id. * @yields {string} name - element name. * @yields {Array} labels - extracted labels. */ /** * Extract all the signature strings of an element. * * @param {HTMLElement} element * @returns {Array} */ _getElementStrings(element) { return [element.id, element.name, element.placeholder?.trim()]; }, /** * Extract all the label strings associated with an element. * * @param {HTMLElement} element * @returns {ElementStrings} */ _getElementLabelStrings(element) { return { *[Symbol.iterator]() { const labels = lazy.LabelUtils.findLabelElements(element); for (let label of labels) { yield* lazy.LabelUtils.extractLabelStrings(label); } const ariaLabels = element.getAttribute("aria-label"); if (ariaLabels) { yield* [ariaLabels]; } }, }; }, // In order to support webkit we need to avoid usage of negative lookbehind due to low support // First safari version with support is 16.4 (Release Date: 27th March 2023) // https://caniuse.com/js-regexp-lookbehind // We can mimic the behaviour of negative lookbehinds by using a named capture group // (? (?notword)|word // TODO: Bug 1829583 testRegex(regex, string) { const matches = string?.matchAll(regex); if (!matches) { return false; } const excludeNegativeCaptureGroups = []; for (const match of matches) { excludeNegativeCaptureGroups.push( ...match.filter(m => m !== match?.groups?.neg).filter(Boolean) ); } return excludeNegativeCaptureGroups?.length > 0; }, /** * Find the first matching field name from a given list of field names * that matches an HTML element. * * The function first tries to match the element against a set of * pre-defined regular expression rules. If no match is found, it * then checks for label-specific rules, if they exist. * * Note: For label rules, the keyword is often more general * (e.g., "^\\W*address"), hence they are only searched within labels * to reduce the occurrence of false positives. * * @param {HTMLElement} element The element to match. * @param {Array} fieldNames An array of field names to compare against. * @returns {string|null} The name of the matched field, or null if no match was found. */ _findMatchedFieldName(element, fieldNames) { if (!fieldNames.length) { return null; } // Attempt to match the element against the default set of rules let matchedFieldName = fieldNames.find(fieldName => this._matchRegexp(element, this.RULES[fieldName]) ); // If no match is found, and if a label rule exists for the field, // attempt to match against the label rules if (!matchedFieldName) { matchedFieldName = fieldNames.find(fieldName => { const regexp = this.LABEL_RULES[fieldName]; return this._matchRegexp(element, regexp, { attribute: false }); }); } return matchedFieldName; }, /** * Determine whether the regexp can match any of element strings. * * @param {HTMLElement} element The HTML element to match. * @param {RegExp} regexp The regular expression to match against. * @param {object} [options] Optional parameters for matching. * @param {boolean} [options.attribute=true] * Whether to match against the element's attributes. * @param {boolean} [options.label=true] * Whether to match against the element's labels. * @returns {boolean} True if a match is found, otherwise false. */ _matchRegexp(element, regexp, { attribute = true, label = true } = {}) { if (!regexp) { return false; } if (attribute) { const elemStrings = this._getElementStrings(element); if (elemStrings.find(s => this.testRegex(regexp, s?.toLowerCase()))) { return true; } } if (label) { const elementLabelStrings = this._getElementLabelStrings(element); for (const s of elementLabelStrings) { if (this.testRegex(regexp, s?.toLowerCase())) { return true; } } } return false; }, /** * Phone field grammars - first matched grammar will be parsed. Grammars are * separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are * parsed separately unless they are necessary parts of the match. * The following notation is used to describe the patterns: * - country code field. * - area code field. * - phone or prefix. * - suffix. * - extension. * :N means field is limited to N characters, otherwise it is unlimited. * (pattern )? means pattern is optional and matched separately. * * This grammar list from Chromium will be enabled partially once we need to * support more cases of Telephone fields. */ PHONE_FIELD_GRAMMARS: [ // Country code: Area Code: Phone: (- // (Ext: )?)? // {REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0}, // {REGEX_AREA, FIELD_AREA_CODE, 0}, // {REGEX_PHONE, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // \( \) :3 :4 (Ext: )? // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3}, // {REGEX_PHONE, FIELD_SUFFIX, 4}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: :3 - :3 - :4 (Ext: )? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_PHONE, FIELD_AREA_CODE, 3}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3}, // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: :3 :3 :3 :4 (Ext: )? ["tel", "tel-country-code", 3], ["tel", "tel-area-code", 3], ["tel", "tel-local-prefix", 3], ["tel", "tel-local-suffix", 4], [null, null, 0], // Area Code: Phone: (- (Ext: )?)? // {REGEX_AREA, FIELD_AREA_CODE, 0}, // {REGEX_PHONE, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: :3 :4 (Ext: )? // {REGEX_PHONE, FIELD_AREA_CODE, 0}, // {REGEX_PHONE, FIELD_PHONE, 3}, // {REGEX_PHONE, FIELD_SUFFIX, 4}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: \( \) (- (Ext: )?)? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: \( \) (- (Ext: )?)? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: - - - (Ext: )? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Area code: :3 Prefix: :3 Suffix: :4 (Ext: )? // {REGEX_AREA, FIELD_AREA_CODE, 3}, // {REGEX_PREFIX, FIELD_PHONE, 3}, // {REGEX_SUFFIX, FIELD_SUFFIX, 4}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: Prefix: Suffix: (Ext: )? // {REGEX_PHONE, FIELD_AREA_CODE, 0}, // {REGEX_PREFIX, FIELD_PHONE, 0}, // {REGEX_SUFFIX, FIELD_SUFFIX, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: - :3 - :4 (Ext: )? ["tel", "tel-area-code", 0], ["tel", "tel-local-prefix", 3], ["tel", "tel-local-suffix", 4], [null, null, 0], // Phone: - - (Ext: )? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0}, // {REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: - (Ext: )? // {REGEX_AREA, FIELD_AREA_CODE, 0}, // {REGEX_PHONE, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: :3 - :10 (Ext: )? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 3}, // {REGEX_PHONE, FIELD_PHONE, 10}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Ext: // {REGEX_EXTENSION, FIELD_EXTENSION, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: (Ext: )? // {REGEX_PHONE, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, ], }; ChromeUtils.defineLazyGetter( FormAutofillHeuristics, "CREDIT_CARD_FIELDNAMES", () => Object.keys(FormAutofillHeuristics.RULES).filter(name => lazy.FormAutofillUtils.isCreditCardField(name) ) ); ChromeUtils.defineLazyGetter(FormAutofillHeuristics, "ADDRESS_FIELDNAMES", () => Object.keys(FormAutofillHeuristics.RULES).filter(name => lazy.FormAutofillUtils.isAddressField(name) ) ); export default FormAutofillHeuristics;