diff options
Diffstat (limited to '')
-rw-r--r-- | toolkit/components/formautofill/FormAutofillHeuristics.jsm | 1417 |
1 files changed, 1417 insertions, 0 deletions
diff --git a/toolkit/components/formautofill/FormAutofillHeuristics.jsm b/toolkit/components/formautofill/FormAutofillHeuristics.jsm new file mode 100644 index 0000000000..b3b2295964 --- /dev/null +++ b/toolkit/components/formautofill/FormAutofillHeuristics.jsm @@ -0,0 +1,1417 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Form Autofill field heuristics. + */ + +"use strict"; + +const EXPORTED_SYMBOLS = ["FormAutofillHeuristics", "FieldScanner"]; +let FormAutofillHeuristics; + +const { XPCOMUtils } = ChromeUtils.importESModule( + "resource://gre/modules/XPCOMUtils.sys.mjs" +); +const { FormAutofill } = ChromeUtils.import( + "resource://autofill/FormAutofill.jsm" +); + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + CreditCard: "resource://gre/modules/CreditCard.sys.mjs", +}); + +XPCOMUtils.defineLazyModuleGetters(lazy, { + creditCardRulesets: "resource://autofill/CreditCardRuleset.jsm", + FormAutofillUtils: "resource://autofill/FormAutofillUtils.jsm", + LabelUtils: "resource://autofill/FormAutofillUtils.jsm", +}); + +XPCOMUtils.defineLazyGetter(lazy, "log", () => + FormAutofill.defineLogGetter(lazy, EXPORTED_SYMBOLS[0]) +); + +const PREF_HEURISTICS_ENABLED = "extensions.formautofill.heuristics.enabled"; +const PREF_SECTION_ENABLED = "extensions.formautofill.section.enabled"; +const DEFAULT_SECTION_NAME = "-moz-section-default"; + +/** + * To help us classify sections, we want to know what fields can appear + * multiple times in a row. + * Such fields, like `address-line{X}`, should not break sections. + */ +const MULTI_FIELD_NAMES = [ + "address-level3", + "address-level2", + "address-level1", + "tel", + "postal-code", + "email", + "street-address", +]; + +/** + * To help us classify sections that can appear only N times in a row. + * For example, the only time multiple cc-number fields are valid is when + * there are four of these fields in a row. + * Otherwise, multiple cc-number fields should be in separate sections. + */ +const MULTI_N_FIELD_NAMES = { + "cc-number": 4, +}; + +/** + * A scanner for traversing all elements in a form and retrieving the field + * detail with FormAutofillHeuristics.getInfo function. It also provides a + * cursor (parsingIndex) to indicate which element is waiting for parsing. + */ +class FieldScanner { + /** + * Create a FieldScanner based on form elements with the existing + * fieldDetails. + * + * @param {Array.DOMElement} elements + * The elements from a form for each parser. + */ + constructor(elements, { allowDuplicates = false, sectionEnabled = true }) { + this._elementsWeakRef = Cu.getWeakReference(elements); + this.fieldDetails = []; + this._parsingIndex = 0; + this._sections = []; + this._allowDuplicates = allowDuplicates; + this._sectionEnabled = sectionEnabled; + } + + get _elements() { + return this._elementsWeakRef.get(); + } + + /** + * This cursor means the index of the element which is waiting for parsing. + * + * @returns {number} + * The index of the element which is waiting for parsing. + */ + get parsingIndex() { + return this._parsingIndex; + } + + /** + * Move the parsingIndex to the next elements. Any elements behind this index + * means the parsing tasks are finished. + * + * @param {number} index + * The latest index of elements waiting for parsing. + */ + set parsingIndex(index) { + if (index > this._elements.length) { + throw new Error("The parsing index is out of range."); + } + this._parsingIndex = index; + } + + /** + * Retrieve the field detail by the index. If the field detail is not ready, + * the elements will be traversed until matching the index. + * + * @param {number} index + * The index of the element that you want to retrieve. + * @returns {object} + * The field detail at the specific index. + */ + getFieldDetailByIndex(index) { + if (index >= this._elements.length) { + throw new Error( + `The index ${index} is out of range.(${this._elements.length})` + ); + } + + if (index < this.fieldDetails.length) { + return this.fieldDetails[index]; + } + + for (let i = this.fieldDetails.length; i < index + 1; i++) { + this.pushDetail(); + } + + return this.fieldDetails[index]; + } + + get parsingFinished() { + return this.parsingIndex >= this._elements.length; + } + + _pushToSection(name, fieldDetail) { + for (let section of this._sections) { + if (section.name == name) { + section.fieldDetails.push(fieldDetail); + return; + } + } + this._sections.push({ + name, + fieldDetails: [fieldDetail], + }); + } + /** + * Merges the next N fields if the currentType is in the list of MULTI_N_FIELD_NAMES + * + * @param {number} mergeNextNFields How many of the next N fields to merge into the current section + * @param {string} currentType Type of the current field detail + * @param {Array<object>} fieldDetails List of current field details + * @param {number} i Index to keep track of the fieldDetails list + * @param {boolean} createNewSection Determines if a new section should be created + * @returns {Array<(number|boolean)>} mergeNextNFields and creatNewSection for use in _classifySections + * @memberof FieldScanner + */ + _mergeNextNFields( + mergeNextNFields, + currentType, + fieldDetails, + i, + createNewSection + ) { + if (mergeNextNFields) { + mergeNextNFields--; + } else { + // We use -2 here because we have already seen two consecutive fields, + // the previous one and the current one. + // This ensures we don't accidentally add a field we've already seen. + let nextN = MULTI_N_FIELD_NAMES[currentType] - 2; + let array = fieldDetails.slice(i + 1, i + 1 + nextN); + if ( + array.length == nextN && + array.every(detail => detail.fieldName == currentType) + ) { + mergeNextNFields = nextN; + } else { + createNewSection = true; + } + } + return { mergeNextNFields, createNewSection }; + } + + _classifySections() { + let fieldDetails = this._sections[0].fieldDetails; + this._sections = []; + let seenTypes = new Set(); + let previousType; + let sectionCount = 0; + let mergeNextNFields = 0; + + for (let i = 0; i < fieldDetails.length; i++) { + let currentType = fieldDetails[i].fieldName; + if (!currentType) { + continue; + } + + let createNewSection = false; + if (seenTypes.has(currentType)) { + if (previousType != currentType) { + // If we have seen this field before and it is different from + // the previous one, always create a new section. + createNewSection = true; + } else if (MULTI_FIELD_NAMES.includes(currentType)) { + // For fields that can appear multiple times in a row + // within one section, don't create a new section + } else if (currentType in MULTI_N_FIELD_NAMES) { + // This is the heuristic to handle special cases where we can have multiple + // fields in one section, but only if the field has appeared N times in a row. + // For example, websites can use 4 consecutive 4-digit `cc-number` fields + // instead of one 16-digit `cc-number` field. + ({ mergeNextNFields, createNewSection } = this._mergeNextNFields( + mergeNextNFields, + currentType, + fieldDetails, + i, + createNewSection + )); + } else { + // Fields that should not appear multiple times in one section. + createNewSection = true; + } + } + + if (createNewSection) { + mergeNextNFields = 0; + seenTypes.clear(); + sectionCount++; + } + + previousType = currentType; + seenTypes.add(currentType); + this._pushToSection( + DEFAULT_SECTION_NAME + "-" + sectionCount, + fieldDetails[i] + ); + } + } + + /** + * The result is an array contains the sections with its belonging field + * details. If `this._sections` contains one section only with the default + * section name (DEFAULT_SECTION_NAME), `this._classifySections` should be + * able to identify all sections in the heuristic way. + * + * @returns {Array<object>} + * The array with the sections, and the belonging fieldDetails are in + * each section. For example, it may return something like this: + * [{ + * type: FormAutofillUtils.SECTION_TYPES.ADDRESS, // section type + * fieldDetails: [{ // a record for each field + * fieldName: "email", + * section: "", + * addressType: "", + * contactType: "", + * elementWeakRef: the element + * }, ...] + * }, + * { + * type: FormAutofillUtils.SECTION_TYPES.CREDIT_CARD, + * fieldDetails: [{ + * fieldName: "cc-exp-month", + * section: "", + * addressType: "", + * contactType: "", + * elementWeakRef: the element + * }, ...] + * }] + */ + getSectionFieldDetails() { + // When the section feature is disabled, `getSectionFieldDetails` should + // provide a single address and credit card section result. + if (!this._sectionEnabled) { + return this._getFinalDetails(this.fieldDetails); + } + if (!this._sections.length) { + return []; + } + if ( + this._sections.length == 1 && + this._sections[0].name == DEFAULT_SECTION_NAME + ) { + this._classifySections(); + } + + return this._sections.reduce((sections, current) => { + sections.push(...this._getFinalDetails(current.fieldDetails)); + return sections; + }, []); + } + + /** + * This function will prepare an autocomplete info object with getInfo + * function and push the detail to fieldDetails property. + * Any field will be pushed into `this._sections` based on the section name + * in `autocomplete` attribute. + * + * Any element without the related detail will be used for adding the detail + * to the end of field details. + */ + pushDetail() { + let elementIndex = this.fieldDetails.length; + if (elementIndex >= this._elements.length) { + throw new Error("Try to push the non-existing element info."); + } + let element = this._elements[elementIndex]; + let info = FormAutofillHeuristics.getInfo(element, this); + let fieldInfo = { + section: info?.section ?? "", + addressType: info?.addressType ?? "", + contactType: info?.contactType ?? "", + fieldName: info?.fieldName ?? "", + confidence: info?.confidence, + elementWeakRef: Cu.getWeakReference(element), + }; + + if (info?._reason) { + fieldInfo._reason = info._reason; + } + + this.fieldDetails.push(fieldInfo); + this._pushToSection(this._getSectionName(fieldInfo), fieldInfo); + } + + _getSectionName(info) { + let names = []; + if (info.section) { + names.push(info.section); + } + if (info.addressType) { + names.push(info.addressType); + } + return names.length ? names.join(" ") : DEFAULT_SECTION_NAME; + } + + /** + * When a field detail should be changed its fieldName after parsing, use + * this function to update the fieldName which is at a specific index. + * + * @param {number} index + * The index indicates a field detail to be updated. + * @param {string} fieldName + * The new fieldName + */ + updateFieldName(index, fieldName) { + if (index >= this.fieldDetails.length) { + throw new Error("Try to update the non-existing field detail."); + } + this.fieldDetails[index].fieldName = fieldName; + } + + _isSameField(field1, field2) { + return ( + field1.section == field2.section && + field1.addressType == field2.addressType && + field1.fieldName == field2.fieldName && + !field1.transform && + !field2.transform + ); + } + + /** + * When a site has four credit card number fields and + * these fields have a max length of four + * then we transform the credit card number into + * four subsections in order to fill correctly. + * + * @param {Array<object>} creditCardFieldDetails + * The credit card field details to be transformed for multiple cc-number fields filling + * @memberof FieldScanner + */ + _transformCCNumberForMultipleFields(creditCardFieldDetails) { + let ccNumberFields = creditCardFieldDetails.filter( + field => + field.fieldName == "cc-number" && + field.elementWeakRef.get().maxLength == 4 + ); + if (ccNumberFields.length == 4) { + ccNumberFields[0].transform = fullCCNumber => fullCCNumber.slice(0, 4); + ccNumberFields[1].transform = fullCCNumber => fullCCNumber.slice(4, 8); + ccNumberFields[2].transform = fullCCNumber => fullCCNumber.slice(8, 12); + ccNumberFields[3].transform = fullCCNumber => fullCCNumber.slice(12, 16); + } + } + + /** + * Provide the final field details without invalid field name, and the + * duplicated fields will be removed as well. For the debugging purpose, + * the final `fieldDetails` will include the duplicated fields if + * `_allowDuplicates` is true. + * + * Each item should contain one type of fields only, and the two valid types + * are Address and CreditCard. + * + * @param {Array<object>} fieldDetails + * The field details for trimming. + * @returns {Array<object>} + * The array with the field details without invalid field name and + * duplicated fields. + */ + _getFinalDetails(fieldDetails) { + let addressFieldDetails = []; + let creditCardFieldDetails = []; + for (let fieldDetail of fieldDetails) { + let fieldName = fieldDetail.fieldName; + if (lazy.FormAutofillUtils.isAddressField(fieldName)) { + addressFieldDetails.push(fieldDetail); + } else if (lazy.FormAutofillUtils.isCreditCardField(fieldName)) { + creditCardFieldDetails.push(fieldDetail); + } else { + lazy.log.debug( + "Not collecting a field with a unknown fieldName", + fieldDetail + ); + } + } + this._transformCCNumberForMultipleFields(creditCardFieldDetails); + return [ + { + type: lazy.FormAutofillUtils.SECTION_TYPES.ADDRESS, + fieldDetails: addressFieldDetails, + }, + { + type: lazy.FormAutofillUtils.SECTION_TYPES.CREDIT_CARD, + fieldDetails: creditCardFieldDetails, + }, + ] + .map(section => { + if (this._allowDuplicates) { + return section; + } + // Deduplicate each set of fieldDetails + let details = section.fieldDetails; + section.fieldDetails = details.filter((detail, index) => { + let previousFields = details.slice(0, index); + return !previousFields.find(f => this._isSameField(detail, f)); + }); + return section; + }) + .filter(section => !!section.fieldDetails.length); + } + + elementExisting(index) { + return index < this._elements.length; + } + + /** + * Using Fathom, say what kind of CC field an element is most likely to be. + * This function deoesn't only run fathom on the passed elements. It also + * runs fathom for all elements in the FieldScanner for optimization purpose. + * + * @param {HTMLElement} element + * @param {Array} fields + * @returns {Array} A tuple of [field name, probability] describing the + * highest-confidence classification + */ + getFathomField(element, fields) { + if (!fields.length) { + return [null, null]; + } + + if (!this._fathomConfidences?.get(element)) { + this._fathomConfidences = new Map(); + + let elements = []; + if (this._elements?.includes(element)) { + elements = this._elements; + } else { + elements = [element]; + } + + // This should not throw unless we run into an OOM situation, at which + // point we have worse problems and this failing is not a big deal. + let confidences = FieldScanner.getFormAutofillConfidences(elements); + for (let i = 0; i < elements.length; i++) { + this._fathomConfidences.set(elements[i], confidences[i]); + } + } + + let elementConfidences = this._fathomConfidences.get(element); + if (!elementConfidences) { + return [null, null]; + } + + let highestField = null; + let highestConfidence = lazy.FormAutofillUtils.ccFathomConfidenceThreshold; // Start with a threshold of 0.5 + for (let [key, value] of Object.entries(elementConfidences)) { + if (!fields.includes(key)) { + // ignore field that we don't care + continue; + } + + if (value > highestConfidence) { + highestConfidence = value; + highestField = key; + } + } + + if (!highestField) { + return [null, null]; + } + + // Used by test ONLY! This ensure testcases always get the same confidence + if (lazy.FormAutofillUtils.ccFathomTestConfidence > 0) { + highestConfidence = lazy.FormAutofillUtils.ccFathomTestConfidence; + } + + return [highestField, highestConfidence]; + } + + /** + * @param {Array} elements Array of elements that we want to get result from fathom cc rules + * @returns {object} Fathom confidence keyed by field-type. + */ + static getFormAutofillConfidences(elements) { + if ( + lazy.FormAutofillUtils.ccHeuristicsMode == + lazy.FormAutofillUtils.CC_FATHOM_NATIVE + ) { + let confidences = ChromeUtils.getFormAutofillConfidences(elements); + return confidences.map(c => { + let result = {}; + for (let [fieldName, confidence] of Object.entries(c)) { + let type = lazy.FormAutofillUtils.formAutofillConfidencesKeyToCCFieldType( + fieldName + ); + result[type] = confidence; + } + return result; + }); + } + + return elements.map(element => { + /** + * Return how confident our ML model is that `element` is a field of the + * given type. + * + * @param {string} fieldName The Fathom type to check against. This is + * conveniently the same as the autocomplete attribute value that means + * the same thing. + * @returns {number} Confidence in range [0, 1] + */ + function confidence(fieldName) { + const ruleset = lazy.creditCardRulesets[fieldName]; + const fnodes = ruleset.against(element).get(fieldName); + + // fnodes is either 0 or 1 item long, since we ran the ruleset + // against a single element: + return fnodes.length ? fnodes[0].scoreFor(fieldName) : 0; + } + + // Bang the element against the ruleset for every type of field: + let confidences = {}; + lazy.creditCardRulesets.types.map(fieldName => { + confidences[fieldName] = confidence(fieldName); + }); + + return confidences; + }); + } +} + +/** + * Returns the autocomplete information of fields according to heuristics. + */ +FormAutofillHeuristics = { + RULES: null, + + CREDIT_CARD_FIELDNAMES: [], + ADDRESS_FIELDNAMES: [], + /** + * Try to find a contiguous sub-array within an array. + * + * @param {Array} array + * @param {Array} subArray + * + * @returns {boolean} + * Return whether subArray was found within the array or not. + */ + _matchContiguousSubArray(array, subArray) { + return array.some((elm, i) => + subArray.every((sElem, j) => sElem == array[i + j]) + ); + }, + + /** + * Try to find the field that is look like a month select. + * + * @param {DOMElement} element + * @returns {boolean} + * Return true if we observe the trait of month select in + * the current element. + */ + _isExpirationMonthLikely(element) { + if (!HTMLSelectElement.isInstance(element)) { + return false; + } + + const options = [...element.options]; + const desiredValues = Array(12) + .fill(1) + .map((v, i) => v + i); + + // The number of month options shouldn't be less than 12 or larger than 13 + // including the default option. + if (options.length < 12 || options.length > 13) { + return false; + } + + return ( + this._matchContiguousSubArray( + options.map(e => +e.value), + desiredValues + ) || + this._matchContiguousSubArray( + options.map(e => +e.label), + desiredValues + ) + ); + }, + + /** + * Try to find the field that is look like a year select. + * + * @param {DOMElement} element + * @returns {boolean} + * Return true if we observe the trait of year select in + * the current element. + */ + _isExpirationYearLikely(element) { + if (!HTMLSelectElement.isInstance(element)) { + return false; + } + + const options = [...element.options]; + // A normal expiration year select should contain at least the last three years + // in the list. + const curYear = new Date().getFullYear(); + const desiredValues = Array(3) + .fill(0) + .map((v, i) => v + curYear + i); + + return ( + this._matchContiguousSubArray( + options.map(e => +e.value), + desiredValues + ) || + this._matchContiguousSubArray( + options.map(e => +e.label), + desiredValues + ) + ); + }, + + /** + * Try to match the telephone related fields to the grammar + * list to see if there is any valid telephone set and correct their + * field names. + * + * @param {FieldScanner} fieldScanner + * The current parsing status for all elements + * @returns {boolean} + * Return true if there is any field can be recognized in the parser, + * otherwise false. + */ + _parsePhoneFields(fieldScanner) { + let matchingResult; + + const GRAMMARS = this.PHONE_FIELD_GRAMMARS; + for (let i = 0; i < GRAMMARS.length; i++) { + let detailStart = fieldScanner.parsingIndex; + let ruleStart = i; + for ( + ; + i < GRAMMARS.length && + GRAMMARS[i][0] && + fieldScanner.elementExisting(detailStart); + i++, detailStart++ + ) { + let detail = fieldScanner.getFieldDetailByIndex(detailStart); + if ( + !detail || + GRAMMARS[i][0] != detail.fieldName || + (detail._reason && detail._reason == "autocomplete") + ) { + break; + } + let element = detail.elementWeakRef.get(); + if (!element) { + break; + } + if ( + GRAMMARS[i][2] && + (!element.maxLength || GRAMMARS[i][2] < element.maxLength) + ) { + break; + } + } + if (i >= GRAMMARS.length) { + break; + } + + if (!GRAMMARS[i][0]) { + matchingResult = { + ruleFrom: ruleStart, + ruleTo: i, + }; + break; + } + + // Fast rewinding to the next rule. + for (; i < GRAMMARS.length; i++) { + if (!GRAMMARS[i][0]) { + break; + } + } + } + + let parsedField = false; + if (matchingResult) { + let { ruleFrom, ruleTo } = matchingResult; + let detailStart = fieldScanner.parsingIndex; + for (let i = ruleFrom; i < ruleTo; i++) { + fieldScanner.updateFieldName(detailStart, GRAMMARS[i][1]); + fieldScanner.parsingIndex++; + detailStart++; + parsedField = true; + } + } + + if (fieldScanner.parsingFinished) { + return parsedField; + } + + let nextField = fieldScanner.getFieldDetailByIndex( + fieldScanner.parsingIndex + ); + if ( + nextField && + nextField._reason != "autocomplete" && + fieldScanner.parsingIndex > 0 + ) { + const regExpTelExtension = new RegExp( + "\\bext|ext\\b|extension|ramal", // pt-BR, pt-PT + "iu" + ); + const previousField = fieldScanner.getFieldDetailByIndex( + fieldScanner.parsingIndex - 1 + ); + const previousFieldType = lazy.FormAutofillUtils.getCategoryFromFieldName( + previousField.fieldName + ); + if ( + previousField && + previousFieldType == "tel" && + this._matchRegexp(nextField.elementWeakRef.get(), regExpTelExtension) + ) { + fieldScanner.updateFieldName( + fieldScanner.parsingIndex, + "tel-extension" + ); + fieldScanner.parsingIndex++; + parsedField = true; + } + } + + return parsedField; + }, + + /** + * Try to find the correct address-line[1-3] sequence and correct their field + * names. + * + * @param {FieldScanner} fieldScanner + * The current parsing status for all elements + * @returns {boolean} + * Return true if there is any field can be recognized in the parser, + * otherwise false. + */ + _parseAddressFields(fieldScanner) { + let parsedFields = false; + const addressLines = ["address-line1", "address-line2", "address-line3"]; + + // TODO: These address-line* regexps are for the lines with numbers, and + // they are the subset of the regexps in `heuristicsRegexp.js`. We have to + // find a better way to make them consistent. + const addressLineRegexps = { + "address-line1": new RegExp( + "address[_-]?line(1|one)|address1|addr1" + + "|addrline1|address_1" + // Extra rules by Firefox + "|indirizzo1" + // it-IT + "|住所1" + // ja-JP + "|地址1" + // zh-CN + "|주소.?1", // ko-KR + "iu" + ), + "address-line2": new RegExp( + "address[_-]?line(2|two)|address2|addr2" + + "|addrline2|address_2" + // Extra rules by Firefox + "|indirizzo2" + // it-IT + "|住所2" + // ja-JP + "|地址2" + // zh-CN + "|주소.?2", // ko-KR + "iu" + ), + "address-line3": new RegExp( + "address[_-]?line(3|three)|address3|addr3" + + "|addrline3|address_3" + // Extra rules by Firefox + "|indirizzo3" + // it-IT + "|住所3" + // ja-JP + "|地址3" + // zh-CN + "|주소.?3", // ko-KR + "iu" + ), + }; + while (!fieldScanner.parsingFinished) { + let detail = fieldScanner.getFieldDetailByIndex( + fieldScanner.parsingIndex + ); + if ( + !detail || + !addressLines.includes(detail.fieldName) || + detail._reason == "autocomplete" + ) { + // When the field is not related to any address-line[1-3] fields or + // determined by autocomplete attr, it means the parsing process can be + // terminated. + break; + } + const elem = detail.elementWeakRef.get(); + for (let regexp of Object.keys(addressLineRegexps)) { + if (this._matchRegexp(elem, addressLineRegexps[regexp])) { + fieldScanner.updateFieldName(fieldScanner.parsingIndex, regexp); + parsedFields = true; + } + } + fieldScanner.parsingIndex++; + } + + return parsedFields; + }, + + // The old heuristics can be removed when we fully adopt fathom, so disable the + // esline complexity check for now + /* eslint-disable complexity */ + /** + * Try to look for expiration date fields and revise the field names if needed. + * + * @param {FieldScanner} fieldScanner + * The current parsing status for all elements + * @returns {boolean} + * Return true if there is any field can be recognized in the parser, + * otherwise false. + */ + _parseCreditCardFields(fieldScanner) { + if (fieldScanner.parsingFinished) { + return false; + } + + const savedIndex = fieldScanner.parsingIndex; + const detail = fieldScanner.getFieldDetailByIndex( + fieldScanner.parsingIndex + ); + + // Respect to autocomplete attr + if (!detail || (detail._reason && detail._reason == "autocomplete")) { + return false; + } + + const monthAndYearFieldNames = ["cc-exp-month", "cc-exp-year"]; + // Skip the uninteresting fields + if ( + !["cc-exp", "cc-type", ...monthAndYearFieldNames].includes( + detail.fieldName + ) + ) { + return false; + } + + // The heuristic below should be covered by fathom rules, so we can skip doing + // it. + if ( + lazy.FormAutofillUtils.isFathomCreditCardsEnabled() && + lazy.creditCardRulesets.types.includes(detail.fieldName) + ) { + fieldScanner.parsingIndex++; + return true; + } + + const element = detail.elementWeakRef.get(); + + // If we didn't auto-discover type field, check every select for options that + // match credit card network names in value or label. + if (HTMLSelectElement.isInstance(element)) { + for (let option of element.querySelectorAll("option")) { + if ( + lazy.CreditCard.getNetworkFromName(option.value) || + lazy.CreditCard.getNetworkFromName(option.text) + ) { + fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-type"); + fieldScanner.parsingIndex++; + return true; + } + } + } + + // If the input type is a month picker, then assume it's cc-exp. + if (element.type == "month") { + fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp"); + fieldScanner.parsingIndex++; + + return true; + } + + // Don't process the fields if expiration month and expiration year are already + // matched by regex in correct order. + if ( + fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex++) + .fieldName == "cc-exp-month" && + !fieldScanner.parsingFinished && + fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex++) + .fieldName == "cc-exp-year" + ) { + return true; + } + fieldScanner.parsingIndex = savedIndex; + + // Determine the field name by checking if the fields are month select and year select + // likely. + if (this._isExpirationMonthLikely(element)) { + fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-month"); + fieldScanner.parsingIndex++; + if (!fieldScanner.parsingFinished) { + const nextDetail = fieldScanner.getFieldDetailByIndex( + fieldScanner.parsingIndex + ); + const nextElement = nextDetail.elementWeakRef.get(); + if (this._isExpirationYearLikely(nextElement)) { + fieldScanner.updateFieldName( + fieldScanner.parsingIndex, + "cc-exp-year" + ); + fieldScanner.parsingIndex++; + return true; + } + } + } + fieldScanner.parsingIndex = savedIndex; + + // Verify that the following consecutive two fields can match cc-exp-month and cc-exp-year + // respectively. + if (this._findMatchedFieldName(element, ["cc-exp-month"])) { + fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-month"); + fieldScanner.parsingIndex++; + if (!fieldScanner.parsingFinished) { + const nextDetail = fieldScanner.getFieldDetailByIndex( + fieldScanner.parsingIndex + ); + const nextElement = nextDetail.elementWeakRef.get(); + if (this._findMatchedFieldName(nextElement, ["cc-exp-year"])) { + fieldScanner.updateFieldName( + fieldScanner.parsingIndex, + "cc-exp-year" + ); + fieldScanner.parsingIndex++; + return true; + } + } + } + fieldScanner.parsingIndex = savedIndex; + + // Look for MM and/or YY(YY). + if (this._matchRegexp(element, /^mm$/gi)) { + fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-month"); + fieldScanner.parsingIndex++; + if (!fieldScanner.parsingFinished) { + const nextDetail = fieldScanner.getFieldDetailByIndex( + fieldScanner.parsingIndex + ); + const nextElement = nextDetail.elementWeakRef.get(); + if (this._matchRegexp(nextElement, /^(yy|yyyy)$/)) { + fieldScanner.updateFieldName( + fieldScanner.parsingIndex, + "cc-exp-year" + ); + fieldScanner.parsingIndex++; + + return true; + } + } + } + fieldScanner.parsingIndex = savedIndex; + + // Look for a cc-exp with 2-digit or 4-digit year. + if ( + this._matchRegexp( + element, + /(?:exp.*date[^y\\n\\r]*|mm\\s*[-/]?\\s*)yy(?:[^y]|$)/gi + ) || + this._matchRegexp( + element, + /(?:exp.*date[^y\\n\\r]*|mm\\s*[-/]?\\s*)yyyy(?:[^y]|$)/gi + ) + ) { + fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp"); + fieldScanner.parsingIndex++; + return true; + } + fieldScanner.parsingIndex = savedIndex; + + // Match general cc-exp regexp at last. + if (this._findMatchedFieldName(element, ["cc-exp"])) { + fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp"); + fieldScanner.parsingIndex++; + return true; + } + fieldScanner.parsingIndex = savedIndex; + + // Set current field name to null as it failed to match any patterns. + fieldScanner.updateFieldName(fieldScanner.parsingIndex, null); + fieldScanner.parsingIndex++; + return true; + }, + + /** + * This function should provide all field details of a form which are placed + * in the belonging section. The details contain the autocomplete info + * (e.g. fieldName, section, etc). + * + * `allowDuplicates` is used for the xpcshell-test purpose currently because + * the heuristics should be verified that some duplicated elements still can + * be predicted correctly. + * + * @param {HTMLFormElement} form + * the elements in this form to be predicted the field info. + * @param {boolean} allowDuplicates + * true to remain any duplicated field details otherwise to remove the + * duplicated ones. + * @returns {Array<Array<object>>} + * all sections within its field details in the form. + */ + getFormInfo(form, allowDuplicates = false) { + const eligibleFields = Array.from(form.elements).filter(elem => + lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(elem) + ); + + if (eligibleFields.length <= 0) { + return []; + } + + let fieldScanner = new FieldScanner(eligibleFields, { + allowDuplicates, + sectionEnabled: this._sectionEnabled, + }); + while (!fieldScanner.parsingFinished) { + let parsedPhoneFields = this._parsePhoneFields(fieldScanner); + let parsedAddressFields = this._parseAddressFields(fieldScanner); + let parsedExpirationDateFields = this._parseCreditCardFields( + fieldScanner + ); + + // If there is no field parsed, the parsing cursor can be moved + // forward to the next one. + if ( + !parsedPhoneFields && + !parsedAddressFields && + !parsedExpirationDateFields + ) { + fieldScanner.parsingIndex++; + } + } + + lazy.LabelUtils.clearLabelMap(); + + return fieldScanner.getSectionFieldDetails(); + }, + + _getPossibleFieldNames(element) { + let fieldNames = []; + let isAutoCompleteOff = + element.autocomplete == "off" || element.form?.autocomplete == "off"; + if ( + FormAutofill.isAutofillCreditCardsAvailable && + (!isAutoCompleteOff || FormAutofill.creditCardsAutocompleteOff) + ) { + fieldNames.push(...this.CREDIT_CARD_FIELDNAMES); + } + if ( + FormAutofill.isAutofillAddressesAvailable && + (!isAutoCompleteOff || FormAutofill.addressesAutocompleteOff) + ) { + fieldNames.push(...this.ADDRESS_FIELDNAMES); + } + + if (HTMLSelectElement.isInstance(element)) { + const FIELDNAMES_FOR_SELECT_ELEMENT = [ + "address-level1", + "address-level2", + "country", + "cc-exp-month", + "cc-exp-year", + "cc-exp", + "cc-type", + ]; + fieldNames = fieldNames.filter(name => + FIELDNAMES_FOR_SELECT_ELEMENT.includes(name) + ); + } + + return fieldNames; + }, + + getInfo(element, scanner) { + function infoRecordWithFieldName(fieldName, confidence = null) { + return { + fieldName, + section: "", + addressType: "", + contactType: "", + confidence, + }; + } + + let info = element.getAutocompleteInfo(); + // An input[autocomplete="on"] will not be early return here since it stll + // needs to find the field name. + if ( + info && + info.fieldName && + info.fieldName != "on" && + info.fieldName != "off" + ) { + info._reason = "autocomplete"; + return info; + } + + if (!this._prefEnabled) { + return null; + } + + let fields = this._getPossibleFieldNames(element); + + // "email" type of input is accurate for heuristics to determine its Email + // field or not. However, "tel" type is used for ZIP code for some web site + // (e.g. HomeDepot, BestBuy), so "tel" type should be not used for "tel" + // prediction. + if (element.type == "email" && fields.includes("email")) { + return infoRecordWithFieldName("email"); + } + + if (lazy.FormAutofillUtils.isFathomCreditCardsEnabled()) { + // We don't care fields that are not supported by fathom + let fathomFields = fields.filter(r => + lazy.creditCardRulesets.types.includes(r) + ); + let [matchedFieldName, confidence] = scanner.getFathomField( + element, + fathomFields + ); + // At this point, use fathom's recommendation if it has one + if (matchedFieldName) { + return infoRecordWithFieldName(matchedFieldName, confidence); + } + + // Continue to run regex-based heuristics even when fathom doesn't recognize + // the field. Since the regex-based heuristic has good search coverage but + // has a worse precision. We use it in conjunction with fathom to maximize + // our search coverage. For example, when a <input> is not considered cc-name + // by fathom but is considered cc-name by regex-based heuristic, if the form + // also contains a cc-number identified by fathom, we will treat the form as a + // valid cc form; hence both cc-number & cc-name are identified. + } + + if (fields.length) { + let matchedFieldName = this._findMatchedFieldName(element, fields); + if (matchedFieldName) { + return infoRecordWithFieldName(matchedFieldName); + } + } + + return null; + }, + + /** + * @typedef ElementStrings + * @type {object} + * @yields {string} id - element id. + * @yields {string} name - element name. + * @yields {Array<string>} labels - extracted labels. + */ + + /** + * Extract all the signature strings of an element. + * + * @param {HTMLElement} element + * @returns {ElementStrings} + */ + _getElementStrings(element) { + return { + *[Symbol.iterator]() { + yield element.id; + yield element.name; + yield element.placeholder?.trim(); + + const labels = lazy.LabelUtils.findLabelElements(element); + for (let label of labels) { + yield* lazy.LabelUtils.extractLabelStrings(label); + } + }, + }; + }, + + /** + * Find the first matched field name of the element wih given regex list. + * + * @param {HTMLElement} element + * @param {Array<string>} regexps + * The regex key names that correspond to pattern in the rule list. It will + * be matched against the element string converted to lower case. + * @returns {?string} The first matched field name + */ + _findMatchedFieldName(element, regexps) { + const getElementStrings = this._getElementStrings(element); + for (let regexp of regexps) { + for (let string of getElementStrings) { + if (this.RULES[regexp].test(string?.toLowerCase())) { + return regexp; + } + } + } + + return null; + }, + + /** + * Determine whether the regexp can match any of element strings. + * + * @param {HTMLElement} element + * @param {RegExp} regexp + * + * @returns {boolean} + */ + _matchRegexp(element, regexp) { + const elemStrings = this._getElementStrings(element); + for (const str of elemStrings) { + if (regexp.test(str)) { + return true; + } + } + return false; + }, + + /** + * Phone field grammars - first matched grammar will be parsed. Grammars are + * separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are + * parsed separately unless they are necessary parts of the match. + * The following notation is used to describe the patterns: + * <cc> - country code field. + * <ac> - area code field. + * <phone> - phone or prefix. + * <suffix> - suffix. + * <ext> - extension. + * :N means field is limited to N characters, otherwise it is unlimited. + * (pattern <field>)? means pattern is optional and matched separately. + * + * This grammar list from Chromium will be enabled partially once we need to + * support more cases of Telephone fields. + */ + PHONE_FIELD_GRAMMARS: [ + // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix> + + // (Ext: <ext>)?)? + // {REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0}, + // {REGEX_AREA, FIELD_AREA_CODE, 0}, + // {REGEX_PHONE, FIELD_PHONE, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)? + // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3}, + // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3}, + // {REGEX_PHONE, FIELD_SUFFIX, 4}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)? + // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, + // {REGEX_PHONE, FIELD_AREA_CODE, 3}, + // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3}, + // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)? + ["tel", "tel-country-code", 3], + ["tel", "tel-area-code", 3], + ["tel", "tel-local-prefix", 3], + ["tel", "tel-local-suffix", 4], + [null, null, 0], + + // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)? + // {REGEX_AREA, FIELD_AREA_CODE, 0}, + // {REGEX_PHONE, FIELD_PHONE, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)? + // {REGEX_PHONE, FIELD_AREA_CODE, 0}, + // {REGEX_PHONE, FIELD_PHONE, 3}, + // {REGEX_PHONE, FIELD_SUFFIX, 4}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? + // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, + // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0}, + // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? + // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, + // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0}, + // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)? + // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, + // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0}, + // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, + // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Area code: <ac>:3 Prefix: <prefix>:3 Suffix: <suffix>:4 (Ext: <ext>)? + // {REGEX_AREA, FIELD_AREA_CODE, 3}, + // {REGEX_PREFIX, FIELD_PHONE, 3}, + // {REGEX_SUFFIX, FIELD_SUFFIX, 4}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)? + // {REGEX_PHONE, FIELD_AREA_CODE, 0}, + // {REGEX_PREFIX, FIELD_PHONE, 0}, + // {REGEX_SUFFIX, FIELD_SUFFIX, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)? + ["tel", "tel-area-code", 0], + ["tel", "tel-local-prefix", 3], + ["tel", "tel-local-suffix", 4], + [null, null, 0], + + // Phone: <cc> - <ac> - <phone> (Ext: <ext>)? + // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, + // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0}, + // {REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <ac> - <phone> (Ext: <ext>)? + // {REGEX_AREA, FIELD_AREA_CODE, 0}, + // {REGEX_PHONE, FIELD_PHONE, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)? + // {REGEX_PHONE, FIELD_COUNTRY_CODE, 3}, + // {REGEX_PHONE, FIELD_PHONE, 10}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Ext: <ext> + // {REGEX_EXTENSION, FIELD_EXTENSION, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + + // Phone: <phone> (Ext: <ext>)? + // {REGEX_PHONE, FIELD_PHONE, 0}, + // {REGEX_SEPARATOR, FIELD_NONE, 0}, + ], +}; + +XPCOMUtils.defineLazyGetter(FormAutofillHeuristics, "RULES", () => { + let sandbox = {}; + const HEURISTICS_REGEXP = "resource://autofill/content/heuristicsRegexp.js"; + Services.scriptloader.loadSubScript(HEURISTICS_REGEXP, sandbox); + return sandbox.HeuristicsRegExp.RULES; +}); + +XPCOMUtils.defineLazyGetter( + FormAutofillHeuristics, + "CREDIT_CARD_FIELDNAMES", + () => + Object.keys(FormAutofillHeuristics.RULES).filter(name => + lazy.FormAutofillUtils.isCreditCardField(name) + ) +); + +XPCOMUtils.defineLazyGetter(FormAutofillHeuristics, "ADDRESS_FIELDNAMES", () => + Object.keys(FormAutofillHeuristics.RULES).filter(name => + lazy.FormAutofillUtils.isAddressField(name) + ) +); + +XPCOMUtils.defineLazyPreferenceGetter( + FormAutofillHeuristics, + "_prefEnabled", + PREF_HEURISTICS_ENABLED +); + +XPCOMUtils.defineLazyPreferenceGetter( + FormAutofillHeuristics, + "_sectionEnabled", + PREF_SECTION_ENABLED +); |