/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /** * Fathom ML model for identifying sign up * * This is developed out-of-tree at https://github.com/mozilla-services/fathom-login-forms, * where there is also over a GB of training, validation, and * testing data. To make changes, do your edits there (whether adding new * training pages, adding new rules, or both), retrain and evaluate as * documented at https://mozilla.github.io/fathom/training.html, paste the * coefficients emitted by the trainer into the ruleset, and finally copy the * ruleset's "CODE TO COPY INTO PRODUCTION" section to this file's "CODE FROM * TRAINING REPOSITORY" section. */ import { dom, out, rule, ruleset, score, type, element, utils, } from "resource://gre/modules/third_party/fathom/fathom.mjs"; let { isVisible, attributesMatch, setDefault } = utils; const DEVELOPMENT = false; /** * --- START OF CODE FROM TRAINING REPOSITORY --- */ const coefficients = { form: new Map([ ["formAttributesMatchRegisterRegex", 0.4614015519618988], ["formAttributesMatchLoginRegex", -2.608457326889038], ["formAttributesMatchSubscriptionRegex", -3.253319501876831], ["formAttributesMatchLoginAndRegisterRegex", 3.6423728466033936], ["formHasAcNewPassword", 2.214113473892212], ["formHasAcCurrentPassword", -0.43707895278930664], ["formHasEmailField", 1.760241150856018], ["formHasUsernameField", 1.1527059078216553], ["formHasPasswordField", 1.6670876741409302], ["formHasFirstOrLastNameField", 0.9517516493797302], ["formHasRegisterButton", 1.574048638343811], ["formHasLoginButton", -1.1688978672027588], ["formHasSubscribeButton", -0.26299405097961426], ["formHasContinueButton", 2.3797709941864014], ["formHasTermsAndConditionsHyperlink", 1.764896035194397], ["formHasPasswordForgottenHyperlink", -0.32138824462890625], ["formHasAlreadySignedUpHyperlink", 3.160510301589966], ["closestElementIsEmailLabelLike", 1.0336143970489502], ["formHasRememberMeCheckbox", -1.2176686525344849], ["formHasSubcriptionCheckbox", 0.6100747585296631], ["docTitleMatchesRegisterRegex", 0.680654764175415], ["docTitleMatchesEditProfileRegex", -4.104133605957031], ["closestHeaderMatchesRegisterRegex", 1.3462989330291748], ["closestHeaderMatchesLoginRegex", -0.1804502159357071], ["closestHeaderMatchesSubscriptionRegex", -1.3057124614715576], ]), }; const biases = [["form", -4.402400970458984]]; const loginRegex = /login|log-in|log_in|log in|signon|sign-on|sign_on|sign on|signin|sign-in|sign_in|sign in|einloggen|anmelden|logon|log-on|log_on|log on|Войти|ورود|登录|Přihlásit se|Přihlaste|Авторизоваться|Авторизация|entrar|ログイン|로그인|inloggen|Συνδέσου|accedi|ログオン|Giriş Yap|登入|connecter|connectez-vous|Connexion|Вход|inicia/i; const registerRegex = /regist|sign up|signup|sign-up|sign_up|join|new|登録|neu|erstellen|設定|신규|Créer|Nouveau|baru|nouă|nieuw|create[a-zA-Z\s]+account|create[a-zA-Z\s]+profile|activate[a-zA-Z\s]+account|Zugang anlegen|Angaben prüfen|Konto erstellen|ثبت نام|登録|注册|cadastr|Зарегистрироваться|Регистрация|Bellige alynmak|تسجيل|ΕΓΓΡΑΦΗΣ|Εγγραφή|Créer mon compte|Créer un compte|Mendaftar|가입하기|inschrijving|Zarejestruj się|Deschideți un cont|Создать аккаунт|ร่วม|Üye Ol|ساخت حساب کاربری|Schrijf je|S'inscrire/i; const emailRegex = /mail/i; const usernameRegex = /user|member/i; const nameRegex = /first|last|middle/i; const subscriptionRegex = /subscri|trial|offer|information|angebote|probe|ニュースレター|abonn|promotion|news/i; const termsAndConditionsRegex = /terms|condition|rules|policy|privacy|nutzungsbedingungen|AGB|richtlinien|datenschutz|términos|condiciones/i; const pwForgottenRegex = /forgot|reset|set password|vergessen|vergeten|oublié|dimenticata|Esqueceu|esqueci|Забыли|忘记|找回|Zapomenuté|lost|忘れた|忘れられた|忘れの方|재설정|찾기|help|فراموشی| را فراموش کرده اید|Восстановить|Unuttu|perdus|重新設定|recover|remind|request|restore|trouble|olvidada/i; const continueRegex = /continue|go on|weiter|fortfahren|ga verder|next|continuar/i; const rememberMeRegex = /remember|stay|speichern|merken|bleiben|auto_login|auto-login|auto login|ricordami|manter|mantenha|savelogin|keep me logged in|keep me signed in|save email address|save id|stay signed in|次回からログオンIDの入力を省略する|メールアドレスを保存する|を保存|아이디저장|아이디 저장|로그인 상태 유지|lembrar|mantenha-me conectado|Запомни меня|запомнить меня|Запомните меня|Не спрашивать в следующий раз|下次自动登录|记住我|recordar|angemeldet bleiben/i; const alreadySignedUpRegex = /already|bereits|schon|ya tienes cuenta/i; const editProfile = /edit/i; function createRuleset(coeffs, biases) { let descendantsCache; let surroundingNodesCache; /** * Check document characteristics */ function docTitleMatchesRegisterRegex(fnode) { const docTitle = fnode.element.ownerDocument.title; return checkValueAgainstRegex(docTitle, registerRegex); } function docTitleMatchesEditProfileRegex(fnode) { const docTitle = fnode.element.ownerDocument.title; return checkValueAgainstRegex(docTitle, editProfile); } /** * Check header */ function closestHeaderMatchesLoginRegex(fnode) { return closestHeaderMatchesPredicate(fnode.element, header => checkValueAgainstRegex(header.innerText, loginRegex) ); } function closestHeaderMatchesRegisterRegex(fnode) { return closestHeaderMatchesPredicate(fnode.element, header => checkValueAgainstRegex(header.innerText, registerRegex) ); } function closestHeaderMatchesSubscriptionRegex(fnode) { return closestHeaderMatchesPredicate(fnode.element, header => checkValueAgainstRegex(header.innerText, subscriptionRegex) ); } /** * Check checkboxes */ function formHasRememberMeCheckbox(fnode) { return elementHasRegexMatchingCheckbox(fnode.element, rememberMeRegex); } function formHasSubcriptionCheckbox(fnode) { return elementHasRegexMatchingCheckbox(fnode.element, subscriptionRegex); } /** * Check input fields */ function formHasFirstOrLastNameField(fnode) { const acValues = ["name", "given-name", "family-name"]; return elementHasPredicateMatchingInput( fnode.element, elem => atLeastOne(acValues.filter(ac => elem.autocomplete == ac)) || inputFieldMatchesPredicate(elem, attr => checkValueAgainstRegex(attr, nameRegex) ) ); } function formHasEmailField(fnode) { return elementHasPredicateMatchingInput( fnode.element, elem => elem.autocomplete == "email" || elem.type == "email" || inputFieldMatchesPredicate(elem, attr => checkValueAgainstRegex(attr, emailRegex) ) ); } function formHasUsernameField(fnode) { return elementHasPredicateMatchingInput( fnode.element, elem => elem.autocomplete == "username" || inputFieldMatchesPredicate(elem, attr => checkValueAgainstRegex(attr, usernameRegex) ) ); } function formHasPasswordField(fnode) { const acValues = ["current-password", "new-password"]; return elementHasPredicateMatchingInput( fnode.element, elem => atLeastOne(acValues.filter(ac => elem.autocomplete == ac)) || elem.type == "password" ); } /** * Check autocomplete values */ function formHasAcCurrentPassword(fnode) { return inputFieldMatchesSelector( fnode.element, "autocomplete=current-password" ); } function formHasAcNewPassword(fnode) { return inputFieldMatchesSelector( fnode.element, "autocomplete=new-password" ); } /** * Check hyperlinks within form */ function formHasTermsAndConditionsHyperlink(fnode) { return elementHasPredicateMatchingHyperlink( fnode.element, termsAndConditionsRegex ); } function formHasPasswordForgottenHyperlink(fnode) { return elementHasPredicateMatchingHyperlink( fnode.element, pwForgottenRegex ); } function formHasAlreadySignedUpHyperlink(fnode) { return elementHasPredicateMatchingHyperlink( fnode.element, alreadySignedUpRegex ); } /** * Check labels */ function closestElementIsEmailLabelLike(fnode) { return elementHasPredicateMatchingInput(fnode.element, elem => previousSiblingLabelMatchesRegex(elem, emailRegex) ); } /** * Check buttons */ function formHasRegisterButton(fnode) { return elementHasPredicateMatchingButton( fnode.element, button => checkValueAgainstRegex(button.innerText, registerRegex) || buttonMatchesPredicate(button, attr => checkValueAgainstRegex(attr, registerRegex) ) ); } function formHasLoginButton(fnode) { return elementHasPredicateMatchingButton( fnode.element, button => checkValueAgainstRegex(button.innerText, loginRegex) || buttonMatchesPredicate(button, attr => checkValueAgainstRegex(attr, loginRegex) ) ); } function formHasContinueButton(fnode) { return elementHasPredicateMatchingButton( fnode.element, button => checkValueAgainstRegex(button.innerText, continueRegex) || buttonMatchesPredicate(button, attr => checkValueAgainstRegex(attr, continueRegex) ) ); } function formHasSubscribeButton(fnode) { return elementHasPredicateMatchingButton( fnode.element, button => checkValueAgainstRegex(button.innerText, subscriptionRegex) || buttonMatchesPredicate(button, attr => checkValueAgainstRegex(attr, subscriptionRegex) ) ); } /** * Check form attributes */ function formAttributesMatchRegisterRegex(fnode) { return formMatchesPredicate(fnode.element, attr => checkValueAgainstRegex(attr, registerRegex) ); } function formAttributesMatchLoginRegex(fnode) { return formMatchesPredicate(fnode.element, attr => checkValueAgainstRegex(attr, loginRegex) ); } function formAttributesMatchSubscriptionRegex(fnode) { return formMatchesPredicate(fnode.element, attr => checkValueAgainstRegex(attr, subscriptionRegex) ); } function formAttributesMatchLoginAndRegisterRegex(fnode) { return formMatchesPredicate(fnode.element, attr => checkValueAgainstAllRegex(attr, [registerRegex, loginRegex]) ); } /** * HELPER FUNCTIONS */ function elementMatchesPredicate(element, predicate, additional = []) { return attributesMatch( element, predicate, ["id", "name", "className"].concat(additional) ); } function formMatchesPredicate(element, predicate) { return elementMatchesPredicate(element, predicate, ["action"]); } function inputFieldMatchesPredicate(element, predicate) { return elementMatchesPredicate(element, predicate, ["placeholder"]); } function inputFieldMatchesSelector(element, selector) { return atLeastOne(getElementDescendants(element, `input[${selector}]`)); } function buttonMatchesPredicate(element, predicate) { return elementMatchesPredicate(element, predicate, [ "value", "id", "title", ]); } function elementHasPredicateMatchingDescendant(element, selector, predicate) { const matchingElements = getElementDescendants(element, selector); return matchingElements.some(predicate); } function elementHasPredicateMatchingHeader(element, predicate) { return ( elementHasPredicateMatchingDescendant( element, "h1,h2,h3,h4,h5,h6", predicate ) || elementHasPredicateMatchingDescendant( element, "div[class*=heading],div[class*=header],div[class*=title],header", predicate ) ); } function elementHasPredicateMatchingButton(element, predicate) { return elementHasPredicateMatchingDescendant( element, "button,input[type=submit],input[type=button]", predicate ); } function elementHasPredicateMatchingInput(element, predicate) { return elementHasPredicateMatchingDescendant(element, "input", predicate); } function elementHasPredicateMatchingHyperlink(element, regexExp) { return elementHasPredicateMatchingDescendant( element, "a", link => previousSiblingLabelMatchesRegex(link, regexExp) || checkValueAgainstRegex(link.innerText, regexExp) || elementMatchesPredicate( link, attr => checkValueAgainstRegex(attr, regexExp), ["href"] ) || nextSiblingLabelMatchesRegex(link, regexExp) ); } function elementHasRegexMatchingCheckbox(element, regexExp) { return elementHasPredicateMatchingDescendant( element, "input[type=checkbox], div[class*=checkbox]", box => elementMatchesPredicate(box, attr => checkValueAgainstRegex(attr, regexExp) ) || nextSiblingLabelMatchesRegex(box, regexExp) ); } function nextSiblingLabelMatchesRegex(element, regexExp) { let nextElem = element.nextElementSibling; if (nextElem && nextElem.tagName == "LABEL") { return checkValueAgainstRegex(nextElem.innerText, regexExp); } let closestElem = closestElementFollowing(element, "label"); return closestElem ? checkValueAgainstRegex(closestElem.innerText, regexExp) : false; } function previousSiblingLabelMatchesRegex(element, regexExp) { let previousElem = element.previousElementSibling; if (previousElem && previousElem.tagName == "LABEL") { return checkValueAgainstRegex(previousElem.innerText, regexExp); } let closestElem = closestElementPreceding(element, "label"); return closestElem ? checkValueAgainstRegex(closestElem.innerText, regexExp) : false; } function getElementDescendants(element, selector) { const selectorToDescendants = setDefault( descendantsCache, element, () => new Map() ); return setDefault(selectorToDescendants, selector, () => Array.from(element.querySelectorAll(selector)) ); } function clearCache() { descendantsCache = new WeakMap(); surroundingNodesCache = new WeakMap(); } function closestHeaderMatchesPredicate(element, predicate) { return ( elementHasPredicateMatchingHeader(element, predicate) || closestHeaderAboveMatchesPredicate(element, predicate) ); } function closestHeaderAboveMatchesPredicate(element, predicate) { let closestHeader = closestElementPreceding(element, "h1,h2,h3,h4,h5,h6"); if (closestHeader !== null) { if (predicate(closestHeader)) { return true; } } closestHeader = closestElementPreceding( element, "div[class*=heading],div[class*=header],div[class*=title],header" ); return closestHeader ? predicate(closestHeader) : false; } function closestElementPreceding(element, selector) { return getSurroundingNodes(element, selector).precedingNode; } function closestElementFollowing(element, selector) { return getSurroundingNodes(element, selector).followingNode; } function getSurroundingNodes(element, selector) { const selectorToSurroundingNodes = setDefault( surroundingNodesCache, element, () => new Map() ); return setDefault(selectorToSurroundingNodes, selector, () => { let elements = getElementDescendants(element.ownerDocument, selector); let followingIndex = closestFollowingNodeIndex(elements, element); let precedingIndex = followingIndex - 1; let preceding = precedingIndex < 0 ? null : elements[precedingIndex]; let following = followingIndex == elements.length ? null : elements[followingIndex]; return { precedingNode: preceding, followingNode: following }; }); } function closestFollowingNodeIndex(elements, element) { let low = 0; let high = elements.length; while (low < high) { let i = (low + high) >>> 1; if ( element.compareDocumentPosition(elements[i]) & Node.DOCUMENT_POSITION_PRECEDING ) { low = i + 1; } else { high = i; } } return low; } function checkValueAgainstAllRegex(value, regexExp = []) { return regexExp.every(reg => checkValueAgainstRegex(value, reg)); } function checkValueAgainstRegex(value, regexExp) { return value ? regexExp.test(value) : false; } function atLeastOne(iter) { return iter.length >= 1; } /** * CREATION OF RULESET */ const rules = ruleset( [ rule( DEVELOPMENT ? dom("form").when(isVisible) : element("form"), type("form").note(clearCache) ), // Check form attributes rule(type("form"), score(formAttributesMatchRegisterRegex), { name: "formAttributesMatchRegisterRegex", }), rule(type("form"), score(formAttributesMatchLoginRegex), { name: "formAttributesMatchLoginRegex", }), rule(type("form"), score(formAttributesMatchSubscriptionRegex), { name: "formAttributesMatchSubscriptionRegex", }), rule(type("form"), score(formAttributesMatchLoginAndRegisterRegex), { name: "formAttributesMatchLoginAndRegisterRegex", }), // Check autocomplete attributes rule(type("form"), score(formHasAcCurrentPassword), { name: "formHasAcCurrentPassword", }), rule(type("form"), score(formHasAcNewPassword), { name: "formHasAcNewPassword", }), // Check input fields rule(type("form"), score(formHasEmailField), { name: "formHasEmailField", }), rule(type("form"), score(formHasUsernameField), { name: "formHasUsernameField", }), rule(type("form"), score(formHasPasswordField), { name: "formHasPasswordField", }), rule(type("form"), score(formHasFirstOrLastNameField), { name: "formHasFirstOrLastNameField", }), // Check buttons rule(type("form"), score(formHasRegisterButton), { name: "formHasRegisterButton", }), rule(type("form"), score(formHasLoginButton), { name: "formHasLoginButton", }), rule(type("form"), score(formHasContinueButton), { name: "formHasContinueButton", }), rule(type("form"), score(formHasSubscribeButton), { name: "formHasSubscribeButton", }), // Check hyperlinks rule(type("form"), score(formHasTermsAndConditionsHyperlink), { name: "formHasTermsAndConditionsHyperlink", }), rule(type("form"), score(formHasPasswordForgottenHyperlink), { name: "formHasPasswordForgottenHyperlink", }), rule(type("form"), score(formHasAlreadySignedUpHyperlink), { name: "formHasAlreadySignedUpHyperlink", }), // Check labels rule(type("form"), score(closestElementIsEmailLabelLike), { name: "closestElementIsEmailLabelLike", }), // Check checkboxes rule(type("form"), score(formHasRememberMeCheckbox), { name: "formHasRememberMeCheckbox", }), rule(type("form"), score(formHasSubcriptionCheckbox), { name: "formHasSubcriptionCheckbox", }), // Check header rule(type("form"), score(closestHeaderMatchesRegisterRegex), { name: "closestHeaderMatchesRegisterRegex", }), rule(type("form"), score(closestHeaderMatchesLoginRegex), { name: "closestHeaderMatchesLoginRegex", }), rule(type("form"), score(closestHeaderMatchesSubscriptionRegex), { name: "closestHeaderMatchesSubscriptionRegex", }), // Check doc title rule(type("form"), score(docTitleMatchesRegisterRegex), { name: "docTitleMatchesRegisterRegex", }), rule(type("form"), score(docTitleMatchesEditProfileRegex), { name: "docTitleMatchesEditProfileRegex", }), rule(type("form"), out("form")), ], coeffs, biases ); return rules; } /** * --- END OF CODE FROM TRAINING REPOSITORY --- */ export const SignUpFormRuleset = { type: "form", rules: createRuleset([...coefficients.form], biases), };