diff options
Diffstat (limited to 'tools/lint/fluent-lint')
-rw-r--r-- | tools/lint/fluent-lint/__init__.py | 470 | ||||
-rw-r--r-- | tools/lint/fluent-lint/exclusions.yml | 198 |
2 files changed, 668 insertions, 0 deletions
diff --git a/tools/lint/fluent-lint/__init__.py b/tools/lint/fluent-lint/__init__.py new file mode 100644 index 0000000000..3d0373ea01 --- /dev/null +++ b/tools/lint/fluent-lint/__init__.py @@ -0,0 +1,470 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import bisect +import os +import re +from html.parser import HTMLParser + +import mozpack.path as mozpath +import yaml +from fluent.syntax import ast, parse, visitor +from mozlint import result +from mozlint.pathutils import expand_exclusions + + +class TextElementHTMLParser(HTMLParser): + """HTML Parser for TextElement. + + TextElements may contain embedded html tags, which can include + quotes in attributes. We only want to check the actual text. + """ + + def __init__(self): + super().__init__() + self.extracted_text = [] + + def handle_data(self, data): + self.extracted_text.append(data) + + +class Linter(visitor.Visitor): + """Fluent linter implementation. + + This subclasses the Fluent AST visitor. Methods are called corresponding + to each type of node in the Fluent AST. It is possible to control + whether a node is recursed into by calling the generic_visit method on + the superclass. + + See the documentation here: + https://www.projectfluent.org/python-fluent/fluent.syntax/stable/usage.html + """ + + def __init__( + self, path, config, exclusions, contents, offsets_and_lines, brand_names=[] + ): + super().__init__() + self.path = path + self.config = config + self.exclusions = exclusions + self.contents = contents + self.offsets_and_lines = offsets_and_lines + + self.results = [] + self.identifier_re = re.compile(r"[a-z0-9-]+") + self.apostrophe_re = re.compile(r"\w'") + self.incorrect_apostrophe_re = re.compile(r"\w\u2018\w") + self.single_quote_re = re.compile(r"'(.+)'") + self.double_quote_re = re.compile(r"\".+\"") + self.ellipsis_re = re.compile(r"\.\.\.") + + self.brand_names = brand_names + self.minimum_id_length = 9 + + self.state = { + # The resource comment should be at the top of the page after the license. + "node_can_be_resource_comment": True, + # Group comments must be followed by a message. Two group comments are not + # allowed in a row. + "can_have_group_comment": True, + # Comment bound to the current message + "comment": "", + # The current group comment + "group_comment": "", + # Variables in the current message + "variables": [], + } + + # Set this to true to debug print the root node's json. This is useful for + # writing new lint rules, or debugging existing ones. + self.debug_print_json = False + + def generic_visit(self, node): + node_name = type(node).__name__ + self.state["node_can_be_resource_comment"] = self.state[ + "node_can_be_resource_comment" + ] and ( + # This is the root node. + node_name == "Resource" + # Empty space is allowed. + or node_name == "Span" + # Comments are allowed + or node_name == "Comment" + ) + + if self.debug_print_json: + import json + + print(json.dumps(node.to_json(), indent=2)) + # Only debug print the root node. + self.debug_print_json = False + + super(Linter, self).generic_visit(node) + + def visit_Attribute(self, node): + # Only visit values for Attribute nodes, the identifier comes from dom. + super().generic_visit(node.value) + + def visit_FunctionReference(self, node): + # We don't recurse into function references, the identifiers there are + # allowed to be free form. + pass + + def visit_Message(self, node): + # There must be at least one message or term between group comments. + self.state["can_have_group_comment"] = True + self.last_message_id = node.id.name + + super().generic_visit(node) + + # Check if variables are referenced in comments + if self.state["variables"]: + comments = self.state["comment"] + self.state["group_comment"] + missing_references = [ + v for v in self.state["variables"] if f"${v}" not in comments + ] + if missing_references: + self.add_error( + node, + "VC01", + "Messages including variables should have a comment " + "explaining what will replace the variable. " + "Missing references: " + + ", ".join([f"${m}" for m in missing_references]), + ) + + # Reset current comment and variable references after reading the + # message. + self.state["comment"] = "" + self.state["variables"] = [] + + def visit_Term(self, node): + # There must be at least one message or term between group comments. + self.state["can_have_group_comment"] = True + self.last_message_id = None + + super().generic_visit(node) + + # Reset current comment and variable references after reading the term. + self.state["comment"] = "" + self.state["variables"] = [] + + def visit_MessageReference(self, node): + # We don't recurse into message references, the identifiers are either + # checked elsewhere or are attributes and come from DOM. + pass + + def visit_Identifier(self, node): + if ( + self.path not in self.exclusions["ID01"]["files"] + and node.name not in self.exclusions["ID01"]["messages"] + and not self.identifier_re.fullmatch(node.name) + ): + self.add_error( + node, + "ID01", + "Identifiers may only contain lowercase characters and -", + ) + if ( + len(node.name) < self.minimum_id_length + and self.path not in self.exclusions["ID02"]["files"] + and node.name not in self.exclusions["ID02"]["messages"] + ): + self.add_error( + node, + "ID02", + f"Identifiers must be at least {self.minimum_id_length} characters long", + ) + + def visit_TextElement(self, node): + parser = TextElementHTMLParser() + parser.feed(node.value) + for text in parser.extracted_text: + # To check for apostrophes, first remove pairs of straight quotes + # used as delimiters. + cleaned_str = re.sub(self.single_quote_re, "\1", node.value) + if self.apostrophe_re.search(cleaned_str): + self.add_error( + node, + "TE01", + "Strings with apostrophes should use foo\u2019s instead of foo's.", + ) + if self.incorrect_apostrophe_re.search(text): + self.add_error( + node, + "TE02", + "Strings with apostrophes should use foo\u2019s instead of foo\u2018s.", + ) + if self.single_quote_re.search(text): + self.add_error( + node, + "TE03", + "Single-quoted strings should use Unicode \u2018foo\u2019 instead of 'foo'.", + ) + if self.double_quote_re.search(text): + self.add_error( + node, + "TE04", + 'Double-quoted strings should use Unicode \u201cfoo\u201d instead of "foo".', + ) + if self.ellipsis_re.search(text): + self.add_error( + node, + "TE05", + "Strings with an ellipsis should use the Unicode \u2026 character" + " instead of three periods", + ) + + # If part of a message, check for brand names + if ( + self.last_message_id is not None + and self.path not in self.exclusions["CO01"]["files"] + and self.last_message_id not in self.exclusions["CO01"]["messages"] + ): + found_brands = [] + for brand in self.brand_names: + if brand in text: + found_brands.append(brand) + if found_brands: + self.add_error( + node, + "CO01", + "Strings should use the corresponding terms instead of" + f" hard-coded brand names ({', '.join(found_brands)})", + ) + + def visit_ResourceComment(self, node): + # This node is a comment with: "###" + if not self.state["node_can_be_resource_comment"]: + self.add_error( + node, + "RC01", + "Resource comments (###) should be placed at the top of the file, just " + "after the license header. There should only be one resource comment " + "per file.", + ) + return + + lines_after = get_newlines_count_after(node.span, self.contents) + lines_before = get_newlines_count_before(node.span, self.contents) + + if node.span.end == len(self.contents) - 1: + # This file only contains a resource comment. + return + + if lines_after != 2: + self.add_error( + node, + "RC02", + "Resource comments (###) should be followed by one empty line.", + ) + return + + if lines_before != 2: + self.add_error( + node, + "RC03", + "Resource comments (###) should have one empty line above them.", + ) + return + + def visit_SelectExpression(self, node): + # We only want to visit the variant values, the identifiers in selectors + # and keys are allowed to be free form. + for variant in node.variants: + super().generic_visit(variant.value) + + # Store the variable used for the SelectExpression, excluding functions + # like PLATFORM() + if ( + type(node.selector) == ast.VariableReference + and node.selector.id.name not in self.state["variables"] + ): + self.state["variables"].append(node.selector.id.name) + + def visit_Comment(self, node): + # This node is a comment with: "#" + + # Store the comment + self.state["comment"] = node.content + + def visit_GroupComment(self, node): + # This node is a comment with: "##" + + # Store the group comment + self.state["group_comment"] = node.content + + if not self.state["can_have_group_comment"]: + self.add_error( + node, + "GC04", + "Group comments (##) must be followed by at least one message " + "or term. Make sure that a single group comment with multiple " + "paragraphs is not separated by whitespace, as it will be " + "interpreted as two different comments.", + ) + return + + self.state["can_have_group_comment"] = False + + lines_after = get_newlines_count_after(node.span, self.contents) + lines_before = get_newlines_count_before(node.span, self.contents) + + if node.span.end == len(self.contents) - 1: + # The group comment is the last thing in the file. + + if node.content == "": + # Empty comments are allowed at the end of the file. + return + + self.add_error( + node, + "GC01", + "Group comments (##) should not be at the end of the file, they should " + "always be above a message. Only an empty group comment is allowed at " + "the end of a file.", + ) + return + + if lines_after != 2: + self.add_error( + node, + "GC02", + "Group comments (##) should be followed by one empty line.", + ) + return + + if lines_before != 2: + self.add_error( + node, + "GC03", + "Group comments (##) should have an empty line before them.", + ) + return + + def visit_VariableReference(self, node): + # Identifiers are allowed to be free form, but need to store them + # for comment checks. + + if node.id.name not in self.state["variables"]: + self.state["variables"].append(node.id.name) + + def add_error(self, node, rule, msg): + (col, line) = self.span_to_line_and_col(node.span) + res = { + "path": self.path, + "lineno": line, + "column": col, + "rule": rule, + "message": msg, + } + self.results.append(result.from_config(self.config, **res)) + + def span_to_line_and_col(self, span): + i = bisect.bisect_left(self.offsets_and_lines, (span.start, 0)) + if i > 0: + col = span.start - self.offsets_and_lines[i - 1][0] + else: + col = 1 + span.start + return (col, self.offsets_and_lines[i][1]) + + +def get_offsets_and_lines(contents): + """Return a list consisting of tuples of (offset, line). + + The Fluent AST contains spans of start and end offsets in the file. + This function returns a list of offsets and line numbers so that errors + can be reported using line and column. + """ + line = 1 + result = [] + for m in re.finditer(r"\n", contents): + result.append((m.start(), line)) + line += 1 + return result + + +def get_newlines_count_after(span, contents): + # Determine the number of newlines. + count = 0 + for i in range(span.end, len(contents)): + assert contents[i] != "\r", "This linter does not handle \\r characters." + if contents[i] != "\n": + break + count += 1 + + return count + + +def get_newlines_count_before(span, contents): + # Determine the range of newline characters. + count = 0 + for i in range(span.start - 1, 0, -1): + assert contents[i] != "\r", "This linter does not handle \\r characters." + if contents[i] != "\n": + break + count += 1 + + return count + + +def get_exclusions(root): + with open( + mozpath.join(root, "tools", "lint", "fluent-lint", "exclusions.yml") + ) as f: + exclusions = list(yaml.safe_load_all(f))[0] + for error_type in exclusions: + exclusions[error_type]["files"] = set( + [mozpath.join(root, x) for x in exclusions[error_type]["files"]] + ) + return exclusions + + +def get_branding_list(root, brand_files): + class MessageExtractor(visitor.Visitor): + def __init__(self): + self.brands = [] + self.last_message_id = None + + def visit_Term(self, node): + self.last_message_id = node.id.name + self.generic_visit(node) + + def visit_TextElement(self, node): + if self.last_message_id: + self.brands += [node.value] + self.last_message_id = None + self.generic_visit(node) + + extractor = MessageExtractor() + + for brand_path in brand_files: + brand_file = mozpath.join(root, brand_path) + if os.path.exists(brand_file): + with open(brand_file, encoding="utf-8") as f: + messages = parse(f.read()) + extractor.visit(messages) + + return list(set(extractor.brands)) + + +def lint(paths, config, fix=None, **lintargs): + root = lintargs["root"] + files = list(expand_exclusions(paths, config, root)) + exclusions = get_exclusions(root) + brand_files = config.get("brand-files") + brand_names = get_branding_list(root, brand_files) + results = [] + for path in files: + contents = open(path, "r", encoding="utf-8").read() + linter = Linter( + path, + config, + exclusions, + contents, + get_offsets_and_lines(contents), + brand_names, + ) + linter.visit(parse(contents)) + results.extend(linter.results) + return results diff --git a/tools/lint/fluent-lint/exclusions.yml b/tools/lint/fluent-lint/exclusions.yml new file mode 100644 index 0000000000..ebcd1b0029 --- /dev/null +++ b/tools/lint/fluent-lint/exclusions.yml @@ -0,0 +1,198 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Warning: Only exclusions for identifiers (ID01) are currently allowed. +--- +# Only add exceptions to this file if the ID is generated programmatically and +# can't easily be changed to follow the naming convention. +# Only lowercase letters and hyphens should be used in Fluent IDs. +ID01: + messages: + - trademarkInfo + - crashed-include-URL-2 + - blocklist-item-moz-std-listName + - blocklist-item-moz-full-listName + - shortcuts-browserAction2 + - shortcuts-pageAction + - shortcuts-sidebarAction + - about-networking-originAttributesSuffix + - size-KB + - size-MB + - size-GB + - state-dd-Disabled + - state-dd-Disabled-block-list-state + - memory-unit-B + - memory-unit-KB + - memory-unit-MB + - memory-unit-GB + - memory-unit-TB + - memory-unit-PB + - memory-unit-EB + - enableSafeBrowsing-label + - about-telemetry-show-in-Firefox-json-viewer + - url-classifier-search-listType + # aboutDialog.ftl: Do not add new exceptions for this file, + # new strings should follow the naming convention. + - aboutDialog-title + - releaseNotes-link + - update-checkForUpdatesButton + - update-updateButton + - update-checkingForUpdates + - update-adminDisabled + - update-noUpdatesFound + - update-otherInstanceHandlingUpdates + - warningDesc-version + - bottomLinks-license + - bottomLinks-rights + - bottomLinks-privacy + - aboutDialog-version + - aboutDialog-version-nightly + # certError.ftl: These IDs are generated programmatically + # from certificate error codes. + - connectionFailure-title + - deniedPortAccess-title + - dnsNotFound-title + - fileNotFound-title + - fileAccessDenied-title + - captivePortal-title + - malformedURI-title + - netInterrupt-title + - notCached-title + - netOffline-title + - contentEncodingError-title + - unsafeContentType-title + - netReset-title + - netTimeout-title + - unknownProtocolFound-title + - proxyConnectFailure-title + - proxyResolveFailure-title + - redirectLoop-title + - unknownSocketType-title + - nssFailure2-title + - corruptedContentError-title + - sslv3Used-title + - inadequateSecurityError-title + - blockedByPolicy-title + - clockSkewError-title + - networkProtocolError-title + - nssBadCert-title + - nssBadCert-sts-title + files: + # policies-descriptions.ftl: These IDs are generated programmatically + # from policy names. + - browser/locales/en-US/browser/policies/policies-descriptions.ftl + # The webext-perms-description-* IDs are generated programmatically + # from permission names + - toolkit/locales/en-US/toolkit/global/extensionPermissions.ftl +ID02: + messages: + # browser/components/ion/content/ion.ftl + - ion + # browser/locales/en-US/browser/aboutDialog.ftl + - helpus + # browser/locales/en-US/browser/aboutLogins.ftl + - menu + # browser/locales/en-US/browser/pageInfo.ftl + - copy + - perm-tab + # browser/locales/en-US/browser/tabContextMenu.ftl + - pin-tab + # browser/locales/en-US/browser/touchbar/touchbar.ftl + - back + - forward + - reload + - home + - find + - new-tab + - share + # toolkit/locales/en-US/toolkit/about/aboutPerformance.ftl + - type-tab + - size-KB + - size-MB + - size-GB + - item + # toolkit/locales/en-US/toolkit/about/aboutPlugins.ftl + - file-dd + - path-dd + # toolkit/locales/en-US/toolkit/about/aboutServiceWorkers.ftl + - scope + - waiting + # toolkit/locales/en-US/toolkit/about/aboutSupport.ftl + # yaml interprets yes and no as booleans if quotes are not present. + - "yes" + - "no" + - unknown + - found + - missing + - gpu-ram + - apz-none + # toolkit/locales/en-US/toolkit/printing/printDialogs.ftl + - portrait + - scale + - print-bg + - hf-blank + - hf-title + - hf-url + - hf-page + files: [] +# Hard-coded brand names like Firefox or Mozilla should be used only in +# specific cases, in all other cases the corresponding terms should be used. +# Check with the localization team for advice. +CO01: + messages: + # browser/branding/official/locales/en-US/brand.ftl + - trademarkInfo + # toolkit/locales/en-US/toolkit/neterror/certError.ftl + - cert-error-mitm-mozilla + - cert-error-mitm-connection + # browser/locales/en-US/browser/appExtensionFields.ftl + - extension-firefox-alpenglow-name + # browser/locales/en-US/browser/browser.ftl + - identity-custom-root + - identity-description-custom-root2 + # browser/locales/en-US/browser/migration.ftl + - import-from-firefox + # browser/locales/en-US/browser/migrationWizard.ftl + - migration-wizard-migrator-display-name-firefox + # browser/locales/en-US/browser/newtab/onboarding.ftl + - mr1-onboarding-welcome-image-caption + - mr2022-onboarding-gratitude-subtitle + # browser/locales/en-US/browser/policies/policies-descriptions.ftl + - policy-DisableFirefoxScreenshots + # browser/locales/en-US/browser/preferences/preferences.ftl + - sync-engine-addons + - sync-mobile-promo + # browser/locales/en-US/browser/protectionsPanel.ftl + - protections-panel-content-blocking-breakage-report-view-description + # devtools/client/locales/en-US/aboutdebugging.ftl + - about-debugging-setup-usb-step-enable-debug-firefox2 + - about-debugging-browser-version-too-old-fennec + - about-debugging-browser-version-too-recent + # devtools/client/locales/en-US/application.ftl + - manifest-loaded-devtools-error + # toolkit/locales/en-US/toolkit/about/aboutAddons.ftl + - addon-badge-line3 + - recommended-theme-1 + # toolkit/locales/en-US/toolkit/about/aboutGlean.ftl + - about-glean-description + # toolkit/locales/en-US/toolkit/about/aboutPlugins.ftl + - plugins-openh264-description + # toolkit/locales/en-US/toolkit/about/aboutRights.ftl + - rights-intro-point-1 + - rights-intro-point-2 + # toolkit/locales/en-US/toolkit/about/aboutSupport.ftl + - app-basics-key-mozilla + - virtual-monitor-disp + # toolkit/locales/en-US/toolkit/about/aboutTelemetry.ftl + - about-telemetry-firefox-data-doc + - about-telemetry-telemetry-client-doc + - about-telemetry-telemetry-dashboard + # toolkit/locales/en-US/toolkit/global/extensionPermissions.ftl + - webext-perms-description-management + # toolkit/locales/en-US/toolkit/global/processTypes.ftl + - process-type-privilegedmozilla + files: + - browser/components/ion/content/ion.ftl + - browser/locales/en-US/browser/profile/default-bookmarks.ftl + - toolkit/locales/en-US/toolkit/about/aboutMozilla.ftl |