/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
import { FormAutofill } from "resource://autofill/FormAutofill.sys.mjs";
import { HeuristicsRegExp } from "resource://gre/modules/shared/HeuristicsRegExp.sys.mjs";
const lazy = {};
ChromeUtils.defineESModuleGetters(lazy, {
CreditCard: "resource://gre/modules/CreditCard.sys.mjs",
CreditCardRulesets: "resource://gre/modules/shared/CreditCardRuleset.sys.mjs",
FieldDetail: "resource://gre/modules/shared/FieldScanner.sys.mjs",
FieldScanner: "resource://gre/modules/shared/FieldScanner.sys.mjs",
FormAutofillUtils: "resource://gre/modules/shared/FormAutofillUtils.sys.mjs",
LabelUtils: "resource://gre/modules/shared/LabelUtils.sys.mjs",
MLAutofill: "resource://autofill/MLAutofill.sys.mjs",
});
/**
* To help us classify sections that can appear only N times in a row.
* For example, the only time multiple cc-number fields are valid is when
* there are four of these fields in a row.
* Otherwise, multiple cc-number fields should be in separate sections.
*/
const MULTI_N_FIELD_NAMES = {
"cc-number": 4,
};
const CC_TYPE = 1;
const ADDR_TYPE = 2;
/**
* Returns the autocomplete information of fields according to heuristics.
*/
export const FormAutofillHeuristics = {
RULES: HeuristicsRegExp.getRules(),
LABEL_RULES: HeuristicsRegExp.getLabelRules(),
CREDIT_CARD_FIELDNAMES: [],
ADDRESS_FIELDNAMES: [],
/**
* Try to find a contiguous sub-array within an array.
*
* @param {Array} array
* @param {Array} subArray
*
* @returns {boolean}
* Return whether subArray was found within the array or not.
*/
_matchContiguousSubArray(array, subArray) {
return array.some((elm, i) =>
subArray.every((sElem, j) => sElem == array[i + j])
);
},
/**
* Try to find the field that is look like a month select.
*
* @param {DOMElement} element
* @returns {boolean}
* Return true if we observe the trait of month select in
* the current element.
*/
_isExpirationMonthLikely(element) {
if (!HTMLSelectElement.isInstance(element)) {
return false;
}
const options = [...element.options];
const desiredValues = Array(12)
.fill(1)
.map((v, i) => v + i);
// The number of month options shouldn't be less than 12 or larger than 13
// including the default option.
if (options.length < 12 || options.length > 13) {
return false;
}
return (
this._matchContiguousSubArray(
options.map(e => +e.value),
desiredValues
) ||
this._matchContiguousSubArray(
options.map(e => +e.label),
desiredValues
)
);
},
/**
* Try to find the field that is look like a year select.
*
* @param {DOMElement} element
* @returns {boolean}
* Return true if we observe the trait of year select in
* the current element.
*/
_isExpirationYearLikely(element) {
if (!HTMLSelectElement.isInstance(element)) {
return false;
}
const options = [...element.options];
// A normal expiration year select should contain at least the last three years
// in the list.
const curYear = new Date().getFullYear();
const desiredValues = Array(3)
.fill(0)
.map((v, i) => v + curYear + i);
return (
this._matchContiguousSubArray(
options.map(e => +e.value),
desiredValues
) ||
this._matchContiguousSubArray(
options.map(e => +e.label),
desiredValues
)
);
},
/**
* Return a set of additonal attributes related to a field.
*
* @param {Element} element
* Form element to examine.
* @param {list} fieldNames
* String or list of field names for the element.
* @returns {map}
* Returns a map of extra attributes.
*/
parseAdditionalAttributes(element, fieldNames) {
let attributes = { isLookup: false };
const INTERESTED_FIELDS = [
"street-address",
"address-line1",
"address-line2",
"address-line3",
"postal-code",
];
if (typeof fieldNames == "string") {
fieldNames = [fieldNames];
}
if (fieldNames?.some(fieldName => INTERESTED_FIELDS.includes(fieldName))) {
const regExpLookup = HeuristicsRegExp.getExtraRules("lookup");
if (this._matchRegexp(element, regExpLookup)) {
attributes.isLookup = true;
}
}
return attributes;
},
/**
* This function handles the case when two adjacent fields are incorrectly
* identified with the same field name. Currently, only given-name and
* family-name are handled as possible errors.
*
* @param {FieldScanner} scanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if any field is recognized and updated, otherwise false.
*/
_parseNameFieldsContent(scanner, fieldDetail) {
const TARGET_FIELDS = ["given-name", "family-name"];
if (!TARGET_FIELDS.includes(fieldDetail.fieldName)) {
return false;
}
let idx = scanner.parsingIndex;
const detailBefore = scanner.getFieldDetailByIndex(idx - 1);
if (fieldDetail.fieldName == detailBefore?.fieldName) {
let otherFieldName =
fieldDetail.fieldName == TARGET_FIELDS[0]
? TARGET_FIELDS[1]
: TARGET_FIELDS[0];
// If the second field matches both field names, or both fields match
// both field names, then we change the second field, since the author
// was more likely to miscopy the second field from the first. However,
// if the earlier field only matches, then we change the first field.
if (
this._findMatchedFieldNames(fieldDetail.element, [otherFieldName])
.length
) {
scanner.updateFieldName(idx, otherFieldName);
} else if (
this._findMatchedFieldNames(detailBefore.element, [otherFieldName])
.length
) {
scanner.updateFieldName(idx - 1, otherFieldName);
}
scanner.parsingIndex++;
return true;
}
return false;
},
/**
* In some languages such French (nom) and German (Name), name can mean either family name or
* full name in a form, depending on the context. We want to be sure that if "name" is
* detected in the context of "family-name" or "given-name", it is updated accordingly.
*
* Look for "given-name", "family-name", and "name" fields. If any two of those fields are detected
* and one of them is "name", then replace "name" with "family-name" if "name" is accompanied by
* "given-name" or vise-versa.
*
* @param {FieldScanner} scanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if any field is recognized and updated, otherwise false.
*/
_parseNameFields(scanner, fieldDetail) {
const TARGET_FIELDS = ["name", "given-name", "family-name"];
if (!TARGET_FIELDS.includes(fieldDetail.fieldName)) {
return false;
}
const fields = [];
let nameIndex = -1;
for (let idx = scanner.parsingIndex; ; idx++) {
const detail = scanner.getFieldDetailByIndex(idx);
if (!TARGET_FIELDS.includes(detail?.fieldName)) {
break;
}
if (detail.fieldName === "name") {
nameIndex = idx;
}
fields.push(detail);
}
if (nameIndex != -1 && fields.length == 2) {
//if name is detected and the other of the two fields detected is 'given-name'
//then update name to 'name' to 'family-name'
if (
fields[0].fieldName == "given-name" ||
fields[1].fieldName == "given-name"
) {
scanner.updateFieldName(nameIndex, "family-name");
//if name is detected and the other of the two fields detected is 'family-name'
//then update name to 'name' to 'given-name'
} else if (
fields[0].fieldName == "family-name" ||
fields[1].fieldName == "family-name"
) {
scanner.updateFieldName(nameIndex, "given-name");
} else {
return false;
}
scanner.parsingIndex += fields.length;
return true;
}
return false;
},
/**
* Try to match the telephone related fields to the grammar
* list to see if there is any valid telephone set and correct their
* field names.
*
* @param {FieldScanner} scanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if there is any field can be recognized in the parser,
* otherwise false.
*/
_parsePhoneFields(scanner, _fieldDetail) {
let matchingResult;
const GRAMMARS = this.PHONE_FIELD_GRAMMARS;
function isGrammarSeparator(index) {
return !GRAMMARS[index][0];
}
const savedIndex = scanner.parsingIndex;
for (let ruleFrom = 0; ruleFrom < GRAMMARS.length; ) {
const detailStart = scanner.parsingIndex;
let ruleTo = ruleFrom;
for (let count = 0; ruleTo < GRAMMARS.length; ruleTo++, count++) {
// Bail out when reaching the end of the current set of grammars
// or there are no more elements to parse
if (
isGrammarSeparator(ruleTo) ||
!scanner.elementExisting(detailStart + count)
) {
break;
}
const [category, , length] = GRAMMARS[ruleTo];
const detail = scanner.getFieldDetailByIndex(detailStart + count);
// If the field is not what this grammar rule is interested in, skip processing.
if (
!detail ||
detail.fieldName != category ||
detail.reason == "autocomplete"
) {
break;
}
const element = detail.element;
if (length && (!element.maxLength || length < element.maxLength)) {
break;
}
}
// if we reach the grammar separator, that means all the previous rules are matched.
// Set the matchingResult so we update field names accordingly.
if (isGrammarSeparator(ruleTo)) {
matchingResult = { ruleFrom, ruleTo };
break;
}
// Fast forward to the next rule set.
for (; ruleFrom < GRAMMARS.length; ) {
if (isGrammarSeparator(ruleFrom++)) {
break;
}
}
}
if (matchingResult) {
const { ruleFrom, ruleTo } = matchingResult;
for (let i = ruleFrom; i < ruleTo; i++) {
scanner.updateFieldName(scanner.parsingIndex, GRAMMARS[i][1]);
scanner.parsingIndex++;
}
}
// If the previous parsed field is a "tel" field, run heuristic to see
// if the current field is a "tel-extension" field
const field = scanner.getFieldDetailByIndex(scanner.parsingIndex);
if (field && field.reason != "autocomplete") {
const prev = scanner.getFieldDetailByIndex(scanner.parsingIndex - 1);
if (
prev &&
lazy.FormAutofillUtils.getCategoryFromFieldName(prev.fieldName) == "tel"
) {
const regExpTelExtension = new RegExp(
"\\bext|ext\\b|extension|ramal", // pt-BR, pt-PT
"iug"
);
if (this._matchRegexp(field.element, regExpTelExtension)) {
scanner.updateFieldName(scanner.parsingIndex, "tel-extension");
scanner.parsingIndex++;
}
}
}
return savedIndex != scanner.parsingIndex;
},
/**
* If this is a house number field and there is no address-line1 or
* street-address field, change the house number field to address-line1.
*
* @param {FieldScanner} scanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if there is any field can be recognized in the parser,
* otherwise false.
*/
_parseHouseNumberFields(scanner, fieldDetail) {
if (fieldDetail?.fieldName == "address-housenumber") {
const savedIndex = scanner.parsingIndex;
for (let idx = 0; !scanner.parsingFinished; idx++) {
const detail = scanner.getFieldDetailByIndex(idx);
if (!detail) {
break;
}
if (["address-line1", "street-address"].includes(detail?.fieldName)) {
return false;
}
}
// Return false so additional address handling still gets performed.
scanner.updateFieldName(savedIndex, "street-address");
}
return false;
},
/**
* Try to find the correct address-line[1-3] sequence and correct their field
* names.
*
* @param {FieldScanner} scanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if there is any field can be recognized in the parser,
* otherwise false.
*/
_parseStreetAddressFields(scanner, _fieldDetail) {
const INTERESTED_FIELDS = [
"street-address",
"address-line1",
"address-line2",
"address-line3",
];
// Store the index of fields that are recognized as 'address-housenumber'
let houseNumberFields = [];
// The number of address-related lookup fields found.
let lookupFieldsCount = 0;
// We need to build a list of the address fields. A list of the indicies
// is also needed as the fields with a given name can change positions
// during the update.
const fields = [];
const fieldIndicies = [];
for (let idx = scanner.parsingIndex; !scanner.parsingFinished; idx++) {
const detail = scanner.getFieldDetailByIndex(idx);
// Skip over any house number fields. There should only be zero or one,
// but we'll skip over them all anyway.
if (
[detail?.fieldName, detail?.alternativeFieldName].includes(
"address-housenumber"
)
) {
houseNumberFields.push(idx);
continue;
}
if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
break;
}
if (detail?.isLookup) {
lookupFieldsCount++;
continue; // Skip address lookup fields
}
fields.push(detail);
fieldIndicies.push(idx);
}
if (!fields.length) {
return false;
}
switch (fields.length) {
case 1:
if (
fields[0].reason != "autocomplete" &&
["address-line2", "address-line3"].includes(fields[0].fieldName)
) {
// If an earlier address field was already found, ignore any
// address-related fields from the OTHER_ADDRESS_FIELDS
// list since those can appear in-between the address-level1
// and additional address info fields. If no address field
// exists, update the field to be address-line1.
const OTHER_ADDRESS_FIELDS = [
"address-level1",
"address-level2",
"address-level3",
"postal-code",
"organization",
];
let canUpdate = true;
for (let idx = scanner.parsingIndex - 1; idx >= 0; idx--) {
const detail = scanner.getFieldDetailByIndex(idx);
if (
detail?.fieldName == "street-address" ||
detail?.fieldName == "address-line1" ||
detail?.fieldName == "address-housenumber"
) {
canUpdate = false;
break;
}
if (!OTHER_ADDRESS_FIELDS.includes(detail?.fieldName)) {
break;
}
}
if (canUpdate) {
scanner.updateFieldName(fieldIndicies[0], "address-line1");
}
}
break;
case 2:
if (fields[0].reason == "autocomplete") {
if (
fields[0].fieldName == "street-address" &&
(fields[1].fieldName == "address-line2" ||
fields[1].reason != "autocomplete")
) {
scanner.updateFieldName(fieldIndicies[0], "address-line1", true);
}
} else {
scanner.updateFieldName(fieldIndicies[0], "address-line1");
}
scanner.updateFieldName(fieldIndicies[1], "address-line2");
break;
case 3:
default:
scanner.updateFieldName(fieldIndicies[0], "address-line1");
scanner.updateFieldName(fieldIndicies[1], "address-line2");
scanner.updateFieldName(fieldIndicies[2], "address-line3");
break;
}
// 'address-housenumber' might be recognized alongside another field type
// (see `alternativeFieldName`). In this case, we should update the field
// name before advancing the parsing index.
for (const idx of houseNumberFields) {
scanner.updateFieldName(idx, "address-housenumber");
}
scanner.parsingIndex +=
fields.length + houseNumberFields.length + lookupFieldsCount;
return true;
},
_parseAddressFields(scanner, fieldDetail) {
let fieldFound = false;
// If there is an address-level3 field but no address-level2 field,
// modify to be address-level2.
if (
fieldDetail.fieldName == "address-level3" &&
scanner.getFieldIndexByName("address-level2") == -1
) {
scanner.updateFieldName(scanner.parsingIndex, "address-level2");
fieldFound = true;
}
// State & City(address-level2)
if (
fieldDetail.fieldName == "address-level2" &&
scanner.getFieldIndexByName("address-level1") == -1
) {
const prev = scanner.getFieldDetailByIndex(scanner.parsingIndex - 1);
if (prev && !prev.fieldName && prev.localName == "select") {
scanner.updateFieldName(scanner.parsingIndex - 1, "address-level1");
scanner.parsingIndex += 1;
return true;
}
const next = scanner.getFieldDetailByIndex(scanner.parsingIndex + 1);
if (next && !next.fieldName && next.localName == "select") {
scanner.updateFieldName(scanner.parsingIndex + 1, "address-level1");
scanner.parsingIndex += 2;
return true;
}
fieldFound = true;
}
if (fieldFound) {
scanner.parsingIndex++;
return true;
}
return false;
},
/**
* Try to look for expiration date fields and revise the field names if needed.
*
* @param {FieldScanner} scanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if there is any field can be recognized in the parser,
* otherwise false.
*/
_parseCreditCardExpiryFields(scanner, fieldDetail) {
const INTERESTED_FIELDS = ["cc-exp", "cc-exp-month", "cc-exp-year"];
if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {
return false;
}
const fields = [];
for (let idx = scanner.parsingIndex; ; idx++) {
const detail = scanner.getFieldDetailByIndex(idx);
if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
break;
}
fields.push(detail);
}
// Don't process the fields if expiration month and expiration year are already
// matched by regex in correct order.
if (
(fields.length == 1 && fields[0].fieldName == "cc-exp") ||
(fields.length == 2 &&
fields[0].fieldName == "cc-exp-month" &&
fields[1].fieldName == "cc-exp-year")
) {
scanner.parsingIndex += fields.length;
return true;
}
const prevCCFields = new Set();
for (let idx = scanner.parsingIndex - 1; ; idx--) {
const detail = scanner.getFieldDetailByIndex(idx);
if (
lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) !=
"creditCard"
) {
break;
}
prevCCFields.add(detail.fieldName);
}
// We update the "cc-exp-*" fields to correct "cc-ex-*" fields order when
// the following conditions are met:
// 1. The previous elements are identified as credit card fields and
// cc-number is in it
// 2. There is no "cc-exp-*" fields in the previous credit card elements
if (
["cc-number", "cc-name"].some(f => prevCCFields.has(f)) &&
!["cc-exp", "cc-exp-month", "cc-exp-year"].some(f => prevCCFields.has(f))
) {
if (fields.length == 1) {
scanner.updateFieldName(scanner.parsingIndex, "cc-exp");
} else if (fields.length == 2) {
scanner.updateFieldName(scanner.parsingIndex, "cc-exp-month");
scanner.updateFieldName(scanner.parsingIndex + 1, "cc-exp-year");
}
scanner.parsingIndex += fields.length;
return true;
}
// Set field name to null as it failed to match any patterns.
for (let idx = 0; idx < fields.length; idx++) {
scanner.updateFieldName(scanner.parsingIndex + idx, null);
}
return false;
},
_parseCreditCardNumberFields(scanner, fieldDetail) {
const INTERESTED_FIELDS = ["cc-number"];
if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {
return false;
}
const fieldDetails = [];
for (let idx = scanner.parsingIndex; ; idx++) {
const detail = scanner.getFieldDetailByIndex(idx);
if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
break;
}
fieldDetails.push(detail);
}
// This rule only applies when all the fields are visible
if (fieldDetails.some(field => !field.isVisible)) {
scanner.parsingIndex += fieldDetails.length;
return true;
}
// This is the heuristic to handle special cases where we can have multiple
// fields in one section, but only if the field has appeared N times in a row.
// For example, websites can use 4 consecutive 4-digit `cc-number` fields
// instead of one 16-digit `cc-number` field.
const N = MULTI_N_FIELD_NAMES["cc-number"];
if (fieldDetails.length == N) {
fieldDetails.forEach((fd, index) => {
// part starts with 1
fd.part = index + 1;
});
scanner.parsingIndex += fieldDetails.length;
return true;
}
return false;
},
/**
* Look for cc-*-name fields when *-name field is present
*
* @param {FieldScanner} scanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if there is any field can be recognized in the parser,
* otherwise false.
*/
_parseCreditCardNameFields(scanner, fieldDetail) {
const INTERESTED_FIELDS = [
"name",
"given-name",
"additional-name",
"family-name",
];
if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {
return false;
}
const fields = [];
for (let idx = scanner.parsingIndex; ; idx++) {
const detail = scanner.getFieldDetailByIndex(idx);
if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {
break;
}
fields.push(detail);
}
const prevCCFields = new Set();
for (let idx = scanner.parsingIndex - 1; ; idx--) {
const detail = scanner.getFieldDetailByIndex(idx);
if (
lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) !=
"creditCard"
) {
break;
}
prevCCFields.add(detail.fieldName);
}
const subsequentCCFields = new Set();
for (let idx = scanner.parsingIndex + fields.length; ; idx++) {
const detail = scanner.getFieldDetailByIndex(idx);
if (
// For updates we only check subsequent fields that are not of type address or do not have an
// alternative field name that is of type address, to avoid falsely updating address
// form name fields to cc-*-name.
lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) !=
"creditCard" ||
(detail?.alternativeFieldName !== undefined &&
lazy.FormAutofillUtils.getCategoryFromFieldName(
detail?.alternativeFieldName
) != "creditCard")
) {
break;
}
subsequentCCFields.add(detail.fieldName);
}
const isLastField =
scanner.getFieldDetailByIndex(scanner.parsingIndex + 1) === null;
// We update the "name" fields to "cc-name" fields when the following
// conditions are met:
// 1. The preceding fields are identified as credit card fields and
// contain the "cc-number" field.
// 2. No "cc-name-*" field is found among the preceding credit card fields.
// 3. The "cc-csc" field is either not present among the preceding credit card fields,
// or the current field is the last field in the form. This condition is in place
// because "cc-csc" is often the last field in a credit card form, and we want to
// avoid mistakenly updating fields in subsequent address forms.
if (
(["cc-number"].some(f => prevCCFields.has(f)) &&
!["cc-name", "cc-given-name", "cc-family-name"].some(f =>
prevCCFields.has(f)
) &&
(isLastField || !prevCCFields.has("cc-csc"))) || // 4. Or we update when current name field is followed by
// creditcard form fields that contain cc-number
// and no cc-*-name field is detected
(["cc-number"].some(f => subsequentCCFields.has(f)) &&
!["cc-name", "cc-given-name", "cc-family-name"].some(f =>
subsequentCCFields.has(f)
))
) {
// If there is only one field, assume the name field a `cc-name` field
if (fields.length == 1) {
scanner.updateFieldName(scanner.parsingIndex, `cc-name`);
scanner.parsingIndex += 1;
} else {
// update *-name to cc-*-name
for (const field of fields) {
scanner.updateFieldName(
scanner.parsingIndex,
`cc-${field.fieldName}`
);
scanner.parsingIndex += 1;
}
}
return true;
}
return false;
},
/**
* If the given field is of a different type than the previous
* field, use the alternate field name instead.
*/
_checkForAlternateField(scanner, fieldDetail) {
if (fieldDetail.alternativeFieldName) {
const previousField = scanner.getFieldDetailByIndex(
scanner.parsingIndex - 1
);
if (previousField) {
const preIsCC = lazy.FormAutofillUtils.isCreditCardField(
previousField.fieldName
);
const curIsCC = lazy.FormAutofillUtils.isCreditCardField(
fieldDetail.fieldName
);
// If the current type is different from the previous element's type, use
// the alternative fieldname instead.
if (preIsCC != curIsCC) {
fieldDetail.fieldName = fieldDetail.alternativeFieldName;
fieldDetail.reason = "update-heuristic-alternate";
}
}
}
},
/**
* This function should provide all field details of a form which are placed
* in the belonging section. The details contain the autocomplete info
* (e.g. fieldName, section, etc).
*
* @param {formLike} formLike
* the elements in this form to be predicted the field info.
* @param {boolean} ignoreInvisibleInput
* True to NOT run heuristics on invisible fields.
* @returns {Array}
* all sections within its field details in the form.
*/
getFormInfo(formLike, ignoreInvisibleInput) {
const elements = Array.from(formLike.elements).filter(element =>
lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(element)
);
let closestHeaders;
let closestButtons;
if (FormAutofill.isMLExperimentEnabled && elements.length) {
closestHeaders = lazy.MLAutofill.closestHeaderAbove(elements);
closestButtons = lazy.MLAutofill.closestButtonBelow(elements);
}
const fieldDetails = [];
for (let idx = 0; idx < elements.length; idx++) {
const element = elements[idx];
// Ignore invisible , we still keep invisible