diff options
Diffstat (limited to 'toolkit/components/satchel/SignUpFormRuleset.sys.mjs')
-rw-r--r-- | toolkit/components/satchel/SignUpFormRuleset.sys.mjs | 589 |
1 files changed, 589 insertions, 0 deletions
diff --git a/toolkit/components/satchel/SignUpFormRuleset.sys.mjs b/toolkit/components/satchel/SignUpFormRuleset.sys.mjs new file mode 100644 index 0000000000..7a42880fdb --- /dev/null +++ b/toolkit/components/satchel/SignUpFormRuleset.sys.mjs @@ -0,0 +1,589 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * Fathom ML model for identifying sign up <forms> + * + * This is developed out-of-tree at https://github.com/mozilla-services/fathom-login-forms, + * where there is also over a GB of training, validation, and + * testing data. To make changes, do your edits there (whether adding new + * training pages, adding new rules, or both), retrain and evaluate as + * documented at https://mozilla.github.io/fathom/training.html, paste the + * coefficients emitted by the trainer into the ruleset, and finally copy the + * ruleset's "CODE TO COPY INTO PRODUCTION" section to this file's "CODE FROM + * TRAINING REPOSITORY" section. + */ + +import { + dom, + out, + rule, + ruleset, + score, + type, + element, + utils, +} from "resource://gre/modules/third_party/fathom/fathom.mjs"; + +let { isVisible, attributesMatch, setDefault } = utils; + +const DEVELOPMENT = false; + +/** + * --- START OF CODE FROM TRAINING REPOSITORY --- + */ +const coefficients = { + form: new Map([ + ["formAttributesMatchRegisterRegex", 0.4614015519618988], + ["formAttributesMatchLoginRegex", -2.608457326889038], + ["formAttributesMatchSubscriptionRegex", -3.253319501876831], + ["formAttributesMatchLoginAndRegisterRegex", 3.6423728466033936], + ["formHasAcNewPassword", 2.214113473892212], + ["formHasAcCurrentPassword", -0.43707895278930664], + ["formHasEmailField", 1.760241150856018], + ["formHasUsernameField", 1.1527059078216553], + ["formHasPasswordField", 1.6670876741409302], + ["formHasFirstOrLastNameField", 0.9517516493797302], + ["formHasRegisterButton", 1.574048638343811], + ["formHasLoginButton", -1.1688978672027588], + ["formHasSubscribeButton", -0.26299405097961426], + ["formHasContinueButton", 2.3797709941864014], + ["formHasTermsAndConditionsHyperlink", 1.764896035194397], + ["formHasPasswordForgottenHyperlink", -0.32138824462890625], + ["formHasAlreadySignedUpHyperlink", 3.160510301589966], + ["closestElementIsEmailLabelLike", 1.0336143970489502], + ["formHasRememberMeCheckbox", -1.2176686525344849], + ["formHasSubcriptionCheckbox", 0.6100747585296631], + ["docTitleMatchesRegisterRegex", 0.680654764175415], + ["docTitleMatchesEditProfileRegex", -4.104133605957031], + ["closestHeaderMatchesRegisterRegex", 1.3462989330291748], + ["closestHeaderMatchesLoginRegex", -0.1804502159357071], + ["closestHeaderMatchesSubscriptionRegex", -1.3057124614715576], + ]), +}; + +const biases = [["form", -4.402400970458984]]; + +const loginRegex = + /login|log-in|log_in|log in|signon|sign-on|sign_on|sign on|signin|sign-in|sign_in|sign in|einloggen|anmelden|logon|log-on|log_on|log on|Войти|ورود|登录|Přihlásit se|Přihlaste|Авторизоваться|Авторизация|entrar|ログイン|로그인|inloggen|Συνδέσου|accedi|ログオン|Giriş Yap|登入|connecter|connectez-vous|Connexion|Вход|inicia/i; +const registerRegex = + /regist|sign up|signup|sign-up|sign_up|join|new|登録|neu|erstellen|設定|신규|Créer|Nouveau|baru|nouă|nieuw|create[a-zA-Z\s]+account|create[a-zA-Z\s]+profile|activate[a-zA-Z\s]+account|Zugang anlegen|Angaben prüfen|Konto erstellen|ثبت نام|登録|注册|cadastr|Зарегистрироваться|Регистрация|Bellige alynmak|تسجيل|ΕΓΓΡΑΦΗΣ|Εγγραφή|Créer mon compte|Créer un compte|Mendaftar|가입하기|inschrijving|Zarejestruj się|Deschideți un cont|Создать аккаунт|ร่วม|Üye Ol|ساخت حساب کاربری|Schrijf je|S'inscrire/i; +const emailRegex = /mail/i; +const usernameRegex = /user|member/i; +const nameRegex = /first|last|middle/i; +const subscriptionRegex = + /subscri|trial|offer|information|angebote|probe|ニュースレター|abonn|promotion|news/i; +const termsAndConditionsRegex = + /terms|condition|rules|policy|privacy|nutzungsbedingungen|AGB|richtlinien|datenschutz|términos|condiciones/i; +const pwForgottenRegex = + /forgot|reset|set password|vergessen|vergeten|oublié|dimenticata|Esqueceu|esqueci|Забыли|忘记|找回|Zapomenuté|lost|忘れた|忘れられた|忘れの方|재설정|찾기|help|فراموشی| را فراموش کرده اید|Восстановить|Unuttu|perdus|重新設定|recover|remind|request|restore|trouble|olvidada/i; +const continueRegex = + /continue|go on|weiter|fortfahren|ga verder|next|continuar/i; +const rememberMeRegex = + /remember|stay|speichern|merken|bleiben|auto_login|auto-login|auto login|ricordami|manter|mantenha|savelogin|keep me logged in|keep me signed in|save email address|save id|stay signed in|次回からログオンIDの入力を省略する|メールアドレスを保存する|を保存|아이디저장|아이디 저장|로그인 상태 유지|lembrar|mantenha-me conectado|Запомни меня|запомнить меня|Запомните меня|Не спрашивать в следующий раз|下次自动登录|记住我|recordar|angemeldet bleiben/i; +const alreadySignedUpRegex = /already|bereits|schon|ya tienes cuenta/i; +const editProfile = /edit/i; + +function createRuleset(coeffs, biases) { + let descendantsCache; + let surroundingNodesCache; + + /** + * Check document characteristics + */ + function docTitleMatchesRegisterRegex(fnode) { + const docTitle = fnode.element.ownerDocument.title; + return checkValueAgainstRegex(docTitle, registerRegex); + } + function docTitleMatchesEditProfileRegex(fnode) { + const docTitle = fnode.element.ownerDocument.title; + return checkValueAgainstRegex(docTitle, editProfile); + } + + /** + * Check header + */ + function closestHeaderMatchesLoginRegex(fnode) { + return closestHeaderMatchesPredicate(fnode.element, header => + checkValueAgainstRegex(header.innerText, loginRegex) + ); + } + function closestHeaderMatchesRegisterRegex(fnode) { + return closestHeaderMatchesPredicate(fnode.element, header => + checkValueAgainstRegex(header.innerText, registerRegex) + ); + } + function closestHeaderMatchesSubscriptionRegex(fnode) { + return closestHeaderMatchesPredicate(fnode.element, header => + checkValueAgainstRegex(header.innerText, subscriptionRegex) + ); + } + + /** + * Check checkboxes + */ + function formHasRememberMeCheckbox(fnode) { + return elementHasRegexMatchingCheckbox(fnode.element, rememberMeRegex); + } + function formHasSubcriptionCheckbox(fnode) { + return elementHasRegexMatchingCheckbox(fnode.element, subscriptionRegex); + } + + /** + * Check input fields + */ + function formHasFirstOrLastNameField(fnode) { + const acValues = ["name", "given-name", "family-name"]; + return elementHasPredicateMatchingInput( + fnode.element, + elem => + atLeastOne(acValues.filter(ac => elem.autocomplete == ac)) || + inputFieldMatchesPredicate(elem, attr => + checkValueAgainstRegex(attr, nameRegex) + ) + ); + } + function formHasEmailField(fnode) { + return elementHasPredicateMatchingInput( + fnode.element, + elem => + elem.autocomplete == "email" || + elem.type == "email" || + inputFieldMatchesPredicate(elem, attr => + checkValueAgainstRegex(attr, emailRegex) + ) + ); + } + function formHasUsernameField(fnode) { + return elementHasPredicateMatchingInput( + fnode.element, + elem => + elem.autocomplete == "username" || + inputFieldMatchesPredicate(elem, attr => + checkValueAgainstRegex(attr, usernameRegex) + ) + ); + } + function formHasPasswordField(fnode) { + const acValues = ["current-password", "new-password"]; + return elementHasPredicateMatchingInput( + fnode.element, + elem => + atLeastOne(acValues.filter(ac => elem.autocomplete == ac)) || + elem.type == "password" + ); + } + + /** + * Check autocomplete values + */ + function formHasAcCurrentPassword(fnode) { + return inputFieldMatchesSelector( + fnode.element, + "autocomplete=current-password" + ); + } + function formHasAcNewPassword(fnode) { + return inputFieldMatchesSelector( + fnode.element, + "autocomplete=new-password" + ); + } + + /** + * Check hyperlinks within form + */ + function formHasTermsAndConditionsHyperlink(fnode) { + return elementHasPredicateMatchingHyperlink( + fnode.element, + termsAndConditionsRegex + ); + } + function formHasPasswordForgottenHyperlink(fnode) { + return elementHasPredicateMatchingHyperlink( + fnode.element, + pwForgottenRegex + ); + } + function formHasAlreadySignedUpHyperlink(fnode) { + return elementHasPredicateMatchingHyperlink( + fnode.element, + alreadySignedUpRegex + ); + } + + /** + * Check labels + */ + function closestElementIsEmailLabelLike(fnode) { + return elementHasPredicateMatchingInput(fnode.element, elem => + previousSiblingLabelMatchesRegex(elem, emailRegex) + ); + } + + /** + * Check buttons + */ + function formHasRegisterButton(fnode) { + return elementHasPredicateMatchingButton( + fnode.element, + button => + checkValueAgainstRegex(button.innerText, registerRegex) || + buttonMatchesPredicate(button, attr => + checkValueAgainstRegex(attr, registerRegex) + ) + ); + } + function formHasLoginButton(fnode) { + return elementHasPredicateMatchingButton( + fnode.element, + button => + checkValueAgainstRegex(button.innerText, loginRegex) || + buttonMatchesPredicate(button, attr => + checkValueAgainstRegex(attr, loginRegex) + ) + ); + } + function formHasContinueButton(fnode) { + return elementHasPredicateMatchingButton( + fnode.element, + button => + checkValueAgainstRegex(button.innerText, continueRegex) || + buttonMatchesPredicate(button, attr => + checkValueAgainstRegex(attr, continueRegex) + ) + ); + } + function formHasSubscribeButton(fnode) { + return elementHasPredicateMatchingButton( + fnode.element, + button => + checkValueAgainstRegex(button.innerText, subscriptionRegex) || + buttonMatchesPredicate(button, attr => + checkValueAgainstRegex(attr, subscriptionRegex) + ) + ); + } + + /** + * Check form attributes + */ + function formAttributesMatchRegisterRegex(fnode) { + return formMatchesPredicate(fnode.element, attr => + checkValueAgainstRegex(attr, registerRegex) + ); + } + function formAttributesMatchLoginRegex(fnode) { + return formMatchesPredicate(fnode.element, attr => + checkValueAgainstRegex(attr, loginRegex) + ); + } + function formAttributesMatchSubscriptionRegex(fnode) { + return formMatchesPredicate(fnode.element, attr => + checkValueAgainstRegex(attr, subscriptionRegex) + ); + } + function formAttributesMatchLoginAndRegisterRegex(fnode) { + return formMatchesPredicate(fnode.element, attr => + checkValueAgainstAllRegex(attr, [registerRegex, loginRegex]) + ); + } + + /** + * HELPER FUNCTIONS + */ + function elementMatchesPredicate(element, predicate, additional = []) { + return attributesMatch( + element, + predicate, + ["id", "name", "className"].concat(additional) + ); + } + function formMatchesPredicate(element, predicate) { + return elementMatchesPredicate(element, predicate, ["action"]); + } + function inputFieldMatchesPredicate(element, predicate) { + return elementMatchesPredicate(element, predicate, ["placeholder"]); + } + function inputFieldMatchesSelector(element, selector) { + return atLeastOne(getElementDescendants(element, `input[${selector}]`)); + } + function buttonMatchesPredicate(element, predicate) { + return elementMatchesPredicate(element, predicate, [ + "value", + "id", + "title", + ]); + } + function elementHasPredicateMatchingDescendant(element, selector, predicate) { + const matchingElements = getElementDescendants(element, selector); + return matchingElements.some(predicate); + } + function elementHasPredicateMatchingHeader(element, predicate) { + return ( + elementHasPredicateMatchingDescendant( + element, + "h1,h2,h3,h4,h5,h6", + predicate + ) || + elementHasPredicateMatchingDescendant( + element, + "div[class*=heading],div[class*=header],div[class*=title],header", + predicate + ) + ); + } + function elementHasPredicateMatchingButton(element, predicate) { + return elementHasPredicateMatchingDescendant( + element, + "button,input[type=submit],input[type=button]", + predicate + ); + } + function elementHasPredicateMatchingInput(element, predicate) { + return elementHasPredicateMatchingDescendant(element, "input", predicate); + } + function elementHasPredicateMatchingHyperlink(element, regexExp) { + return elementHasPredicateMatchingDescendant( + element, + "a", + link => + previousSiblingLabelMatchesRegex(link, regexExp) || + checkValueAgainstRegex(link.innerText, regexExp) || + elementMatchesPredicate( + link, + attr => checkValueAgainstRegex(attr, regexExp), + ["href"] + ) || + nextSiblingLabelMatchesRegex(link, regexExp) + ); + } + function elementHasRegexMatchingCheckbox(element, regexExp) { + return elementHasPredicateMatchingDescendant( + element, + "input[type=checkbox], div[class*=checkbox]", + box => + elementMatchesPredicate(box, attr => + checkValueAgainstRegex(attr, regexExp) + ) || nextSiblingLabelMatchesRegex(box, regexExp) + ); + } + + function nextSiblingLabelMatchesRegex(element, regexExp) { + let nextElem = element.nextElementSibling; + if (nextElem && nextElem.tagName == "LABEL") { + return checkValueAgainstRegex(nextElem.innerText, regexExp); + } + let closestElem = closestElementFollowing(element, "label"); + return closestElem + ? checkValueAgainstRegex(closestElem.innerText, regexExp) + : false; + } + + function previousSiblingLabelMatchesRegex(element, regexExp) { + let previousElem = element.previousElementSibling; + if (previousElem && previousElem.tagName == "LABEL") { + return checkValueAgainstRegex(previousElem.innerText, regexExp); + } + let closestElem = closestElementPreceding(element, "label"); + return closestElem + ? checkValueAgainstRegex(closestElem.innerText, regexExp) + : false; + } + function getElementDescendants(element, selector) { + const selectorToDescendants = setDefault( + descendantsCache, + element, + () => new Map() + ); + + return setDefault(selectorToDescendants, selector, () => + Array.from(element.querySelectorAll(selector)) + ); + } + + function clearCache() { + descendantsCache = new WeakMap(); + surroundingNodesCache = new WeakMap(); + } + function closestHeaderMatchesPredicate(element, predicate) { + return ( + elementHasPredicateMatchingHeader(element, predicate) || + closestHeaderAboveMatchesPredicate(element, predicate) + ); + } + function closestHeaderAboveMatchesPredicate(element, predicate) { + let closestHeader = closestElementPreceding(element, "h1,h2,h3,h4,h5,h6"); + + if (closestHeader !== null) { + if (predicate(closestHeader)) { + return true; + } + } + closestHeader = closestElementPreceding( + element, + "div[class*=heading],div[class*=header],div[class*=title],header" + ); + return closestHeader ? predicate(closestHeader) : false; + } + function closestElementPreceding(element, selector) { + return getSurroundingNodes(element, selector).precedingNode; + } + function closestElementFollowing(element, selector) { + return getSurroundingNodes(element, selector).followingNode; + } + function getSurroundingNodes(element, selector) { + const selectorToSurroundingNodes = setDefault( + surroundingNodesCache, + element, + () => new Map() + ); + + return setDefault(selectorToSurroundingNodes, selector, () => { + let elements = getElementDescendants(element.ownerDocument, selector); + let followingIndex = closestFollowingNodeIndex(elements, element); + let precedingIndex = followingIndex - 1; + let preceding = precedingIndex < 0 ? null : elements[precedingIndex]; + let following = + followingIndex == elements.length ? null : elements[followingIndex]; + return { precedingNode: preceding, followingNode: following }; + }); + } + function closestFollowingNodeIndex(elements, element) { + let low = 0; + let high = elements.length; + while (low < high) { + let i = (low + high) >>> 1; + if ( + element.compareDocumentPosition(elements[i]) & + Node.DOCUMENT_POSITION_PRECEDING + ) { + low = i + 1; + } else { + high = i; + } + } + return low; + } + + function checkValueAgainstAllRegex(value, regexExp = []) { + return regexExp.every(reg => checkValueAgainstRegex(value, reg)); + } + + function checkValueAgainstRegex(value, regexExp) { + return value ? regexExp.test(value) : false; + } + function atLeastOne(iter) { + return iter.length >= 1; + } + + /** + * CREATION OF RULESET + */ + const rules = ruleset( + [ + rule( + DEVELOPMENT ? dom("form").when(isVisible) : element("form"), + type("form").note(clearCache) + ), + // Check form attributes + rule(type("form"), score(formAttributesMatchRegisterRegex), { + name: "formAttributesMatchRegisterRegex", + }), + rule(type("form"), score(formAttributesMatchLoginRegex), { + name: "formAttributesMatchLoginRegex", + }), + rule(type("form"), score(formAttributesMatchSubscriptionRegex), { + name: "formAttributesMatchSubscriptionRegex", + }), + rule(type("form"), score(formAttributesMatchLoginAndRegisterRegex), { + name: "formAttributesMatchLoginAndRegisterRegex", + }), + // Check autocomplete attributes + rule(type("form"), score(formHasAcCurrentPassword), { + name: "formHasAcCurrentPassword", + }), + rule(type("form"), score(formHasAcNewPassword), { + name: "formHasAcNewPassword", + }), + // Check input fields + rule(type("form"), score(formHasEmailField), { + name: "formHasEmailField", + }), + rule(type("form"), score(formHasUsernameField), { + name: "formHasUsernameField", + }), + rule(type("form"), score(formHasPasswordField), { + name: "formHasPasswordField", + }), + rule(type("form"), score(formHasFirstOrLastNameField), { + name: "formHasFirstOrLastNameField", + }), + // Check buttons + rule(type("form"), score(formHasRegisterButton), { + name: "formHasRegisterButton", + }), + rule(type("form"), score(formHasLoginButton), { + name: "formHasLoginButton", + }), + rule(type("form"), score(formHasContinueButton), { + name: "formHasContinueButton", + }), + rule(type("form"), score(formHasSubscribeButton), { + name: "formHasSubscribeButton", + }), + // Check hyperlinks + rule(type("form"), score(formHasTermsAndConditionsHyperlink), { + name: "formHasTermsAndConditionsHyperlink", + }), + rule(type("form"), score(formHasPasswordForgottenHyperlink), { + name: "formHasPasswordForgottenHyperlink", + }), + rule(type("form"), score(formHasAlreadySignedUpHyperlink), { + name: "formHasAlreadySignedUpHyperlink", + }), + // Check labels + rule(type("form"), score(closestElementIsEmailLabelLike), { + name: "closestElementIsEmailLabelLike", + }), + // Check checkboxes + rule(type("form"), score(formHasRememberMeCheckbox), { + name: "formHasRememberMeCheckbox", + }), + rule(type("form"), score(formHasSubcriptionCheckbox), { + name: "formHasSubcriptionCheckbox", + }), + // Check header + rule(type("form"), score(closestHeaderMatchesRegisterRegex), { + name: "closestHeaderMatchesRegisterRegex", + }), + rule(type("form"), score(closestHeaderMatchesLoginRegex), { + name: "closestHeaderMatchesLoginRegex", + }), + rule(type("form"), score(closestHeaderMatchesSubscriptionRegex), { + name: "closestHeaderMatchesSubscriptionRegex", + }), + // Check doc title + rule(type("form"), score(docTitleMatchesRegisterRegex), { + name: "docTitleMatchesRegisterRegex", + }), + rule(type("form"), score(docTitleMatchesEditProfileRegex), { + name: "docTitleMatchesEditProfileRegex", + }), + rule(type("form"), out("form")), + ], + coeffs, + biases + ); + return rules; +} + +/** + * --- END OF CODE FROM TRAINING REPOSITORY --- + */ + +export const SignUpFormRuleset = { + type: "form", + rules: createRuleset([...coefficients.form], biases), +}; |