summaryrefslogtreecommitdiffstats
path: root/tools/lint/fluent-lint
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lint/fluent-lint')
-rw-r--r--tools/lint/fluent-lint/__init__.py470
-rw-r--r--tools/lint/fluent-lint/exclusions.yml198
2 files changed, 668 insertions, 0 deletions
diff --git a/tools/lint/fluent-lint/__init__.py b/tools/lint/fluent-lint/__init__.py
new file mode 100644
index 0000000000..3d0373ea01
--- /dev/null
+++ b/tools/lint/fluent-lint/__init__.py
@@ -0,0 +1,470 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import bisect
+import os
+import re
+from html.parser import HTMLParser
+
+import mozpack.path as mozpath
+import yaml
+from fluent.syntax import ast, parse, visitor
+from mozlint import result
+from mozlint.pathutils import expand_exclusions
+
+
+class TextElementHTMLParser(HTMLParser):
+ """HTML Parser for TextElement.
+
+ TextElements may contain embedded html tags, which can include
+ quotes in attributes. We only want to check the actual text.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.extracted_text = []
+
+ def handle_data(self, data):
+ self.extracted_text.append(data)
+
+
+class Linter(visitor.Visitor):
+ """Fluent linter implementation.
+
+ This subclasses the Fluent AST visitor. Methods are called corresponding
+ to each type of node in the Fluent AST. It is possible to control
+ whether a node is recursed into by calling the generic_visit method on
+ the superclass.
+
+ See the documentation here:
+ https://www.projectfluent.org/python-fluent/fluent.syntax/stable/usage.html
+ """
+
+ def __init__(
+ self, path, config, exclusions, contents, offsets_and_lines, brand_names=[]
+ ):
+ super().__init__()
+ self.path = path
+ self.config = config
+ self.exclusions = exclusions
+ self.contents = contents
+ self.offsets_and_lines = offsets_and_lines
+
+ self.results = []
+ self.identifier_re = re.compile(r"[a-z0-9-]+")
+ self.apostrophe_re = re.compile(r"\w'")
+ self.incorrect_apostrophe_re = re.compile(r"\w\u2018\w")
+ self.single_quote_re = re.compile(r"'(.+)'")
+ self.double_quote_re = re.compile(r"\".+\"")
+ self.ellipsis_re = re.compile(r"\.\.\.")
+
+ self.brand_names = brand_names
+ self.minimum_id_length = 9
+
+ self.state = {
+ # The resource comment should be at the top of the page after the license.
+ "node_can_be_resource_comment": True,
+ # Group comments must be followed by a message. Two group comments are not
+ # allowed in a row.
+ "can_have_group_comment": True,
+ # Comment bound to the current message
+ "comment": "",
+ # The current group comment
+ "group_comment": "",
+ # Variables in the current message
+ "variables": [],
+ }
+
+ # Set this to true to debug print the root node's json. This is useful for
+ # writing new lint rules, or debugging existing ones.
+ self.debug_print_json = False
+
+ def generic_visit(self, node):
+ node_name = type(node).__name__
+ self.state["node_can_be_resource_comment"] = self.state[
+ "node_can_be_resource_comment"
+ ] and (
+ # This is the root node.
+ node_name == "Resource"
+ # Empty space is allowed.
+ or node_name == "Span"
+ # Comments are allowed
+ or node_name == "Comment"
+ )
+
+ if self.debug_print_json:
+ import json
+
+ print(json.dumps(node.to_json(), indent=2))
+ # Only debug print the root node.
+ self.debug_print_json = False
+
+ super(Linter, self).generic_visit(node)
+
+ def visit_Attribute(self, node):
+ # Only visit values for Attribute nodes, the identifier comes from dom.
+ super().generic_visit(node.value)
+
+ def visit_FunctionReference(self, node):
+ # We don't recurse into function references, the identifiers there are
+ # allowed to be free form.
+ pass
+
+ def visit_Message(self, node):
+ # There must be at least one message or term between group comments.
+ self.state["can_have_group_comment"] = True
+ self.last_message_id = node.id.name
+
+ super().generic_visit(node)
+
+ # Check if variables are referenced in comments
+ if self.state["variables"]:
+ comments = self.state["comment"] + self.state["group_comment"]
+ missing_references = [
+ v for v in self.state["variables"] if f"${v}" not in comments
+ ]
+ if missing_references:
+ self.add_error(
+ node,
+ "VC01",
+ "Messages including variables should have a comment "
+ "explaining what will replace the variable. "
+ "Missing references: "
+ + ", ".join([f"${m}" for m in missing_references]),
+ )
+
+ # Reset current comment and variable references after reading the
+ # message.
+ self.state["comment"] = ""
+ self.state["variables"] = []
+
+ def visit_Term(self, node):
+ # There must be at least one message or term between group comments.
+ self.state["can_have_group_comment"] = True
+ self.last_message_id = None
+
+ super().generic_visit(node)
+
+ # Reset current comment and variable references after reading the term.
+ self.state["comment"] = ""
+ self.state["variables"] = []
+
+ def visit_MessageReference(self, node):
+ # We don't recurse into message references, the identifiers are either
+ # checked elsewhere or are attributes and come from DOM.
+ pass
+
+ def visit_Identifier(self, node):
+ if (
+ self.path not in self.exclusions["ID01"]["files"]
+ and node.name not in self.exclusions["ID01"]["messages"]
+ and not self.identifier_re.fullmatch(node.name)
+ ):
+ self.add_error(
+ node,
+ "ID01",
+ "Identifiers may only contain lowercase characters and -",
+ )
+ if (
+ len(node.name) < self.minimum_id_length
+ and self.path not in self.exclusions["ID02"]["files"]
+ and node.name not in self.exclusions["ID02"]["messages"]
+ ):
+ self.add_error(
+ node,
+ "ID02",
+ f"Identifiers must be at least {self.minimum_id_length} characters long",
+ )
+
+ def visit_TextElement(self, node):
+ parser = TextElementHTMLParser()
+ parser.feed(node.value)
+ for text in parser.extracted_text:
+ # To check for apostrophes, first remove pairs of straight quotes
+ # used as delimiters.
+ cleaned_str = re.sub(self.single_quote_re, "\1", node.value)
+ if self.apostrophe_re.search(cleaned_str):
+ self.add_error(
+ node,
+ "TE01",
+ "Strings with apostrophes should use foo\u2019s instead of foo's.",
+ )
+ if self.incorrect_apostrophe_re.search(text):
+ self.add_error(
+ node,
+ "TE02",
+ "Strings with apostrophes should use foo\u2019s instead of foo\u2018s.",
+ )
+ if self.single_quote_re.search(text):
+ self.add_error(
+ node,
+ "TE03",
+ "Single-quoted strings should use Unicode \u2018foo\u2019 instead of 'foo'.",
+ )
+ if self.double_quote_re.search(text):
+ self.add_error(
+ node,
+ "TE04",
+ 'Double-quoted strings should use Unicode \u201cfoo\u201d instead of "foo".',
+ )
+ if self.ellipsis_re.search(text):
+ self.add_error(
+ node,
+ "TE05",
+ "Strings with an ellipsis should use the Unicode \u2026 character"
+ " instead of three periods",
+ )
+
+ # If part of a message, check for brand names
+ if (
+ self.last_message_id is not None
+ and self.path not in self.exclusions["CO01"]["files"]
+ and self.last_message_id not in self.exclusions["CO01"]["messages"]
+ ):
+ found_brands = []
+ for brand in self.brand_names:
+ if brand in text:
+ found_brands.append(brand)
+ if found_brands:
+ self.add_error(
+ node,
+ "CO01",
+ "Strings should use the corresponding terms instead of"
+ f" hard-coded brand names ({', '.join(found_brands)})",
+ )
+
+ def visit_ResourceComment(self, node):
+ # This node is a comment with: "###"
+ if not self.state["node_can_be_resource_comment"]:
+ self.add_error(
+ node,
+ "RC01",
+ "Resource comments (###) should be placed at the top of the file, just "
+ "after the license header. There should only be one resource comment "
+ "per file.",
+ )
+ return
+
+ lines_after = get_newlines_count_after(node.span, self.contents)
+ lines_before = get_newlines_count_before(node.span, self.contents)
+
+ if node.span.end == len(self.contents) - 1:
+ # This file only contains a resource comment.
+ return
+
+ if lines_after != 2:
+ self.add_error(
+ node,
+ "RC02",
+ "Resource comments (###) should be followed by one empty line.",
+ )
+ return
+
+ if lines_before != 2:
+ self.add_error(
+ node,
+ "RC03",
+ "Resource comments (###) should have one empty line above them.",
+ )
+ return
+
+ def visit_SelectExpression(self, node):
+ # We only want to visit the variant values, the identifiers in selectors
+ # and keys are allowed to be free form.
+ for variant in node.variants:
+ super().generic_visit(variant.value)
+
+ # Store the variable used for the SelectExpression, excluding functions
+ # like PLATFORM()
+ if (
+ type(node.selector) == ast.VariableReference
+ and node.selector.id.name not in self.state["variables"]
+ ):
+ self.state["variables"].append(node.selector.id.name)
+
+ def visit_Comment(self, node):
+ # This node is a comment with: "#"
+
+ # Store the comment
+ self.state["comment"] = node.content
+
+ def visit_GroupComment(self, node):
+ # This node is a comment with: "##"
+
+ # Store the group comment
+ self.state["group_comment"] = node.content
+
+ if not self.state["can_have_group_comment"]:
+ self.add_error(
+ node,
+ "GC04",
+ "Group comments (##) must be followed by at least one message "
+ "or term. Make sure that a single group comment with multiple "
+ "paragraphs is not separated by whitespace, as it will be "
+ "interpreted as two different comments.",
+ )
+ return
+
+ self.state["can_have_group_comment"] = False
+
+ lines_after = get_newlines_count_after(node.span, self.contents)
+ lines_before = get_newlines_count_before(node.span, self.contents)
+
+ if node.span.end == len(self.contents) - 1:
+ # The group comment is the last thing in the file.
+
+ if node.content == "":
+ # Empty comments are allowed at the end of the file.
+ return
+
+ self.add_error(
+ node,
+ "GC01",
+ "Group comments (##) should not be at the end of the file, they should "
+ "always be above a message. Only an empty group comment is allowed at "
+ "the end of a file.",
+ )
+ return
+
+ if lines_after != 2:
+ self.add_error(
+ node,
+ "GC02",
+ "Group comments (##) should be followed by one empty line.",
+ )
+ return
+
+ if lines_before != 2:
+ self.add_error(
+ node,
+ "GC03",
+ "Group comments (##) should have an empty line before them.",
+ )
+ return
+
+ def visit_VariableReference(self, node):
+ # Identifiers are allowed to be free form, but need to store them
+ # for comment checks.
+
+ if node.id.name not in self.state["variables"]:
+ self.state["variables"].append(node.id.name)
+
+ def add_error(self, node, rule, msg):
+ (col, line) = self.span_to_line_and_col(node.span)
+ res = {
+ "path": self.path,
+ "lineno": line,
+ "column": col,
+ "rule": rule,
+ "message": msg,
+ }
+ self.results.append(result.from_config(self.config, **res))
+
+ def span_to_line_and_col(self, span):
+ i = bisect.bisect_left(self.offsets_and_lines, (span.start, 0))
+ if i > 0:
+ col = span.start - self.offsets_and_lines[i - 1][0]
+ else:
+ col = 1 + span.start
+ return (col, self.offsets_and_lines[i][1])
+
+
+def get_offsets_and_lines(contents):
+ """Return a list consisting of tuples of (offset, line).
+
+ The Fluent AST contains spans of start and end offsets in the file.
+ This function returns a list of offsets and line numbers so that errors
+ can be reported using line and column.
+ """
+ line = 1
+ result = []
+ for m in re.finditer(r"\n", contents):
+ result.append((m.start(), line))
+ line += 1
+ return result
+
+
+def get_newlines_count_after(span, contents):
+ # Determine the number of newlines.
+ count = 0
+ for i in range(span.end, len(contents)):
+ assert contents[i] != "\r", "This linter does not handle \\r characters."
+ if contents[i] != "\n":
+ break
+ count += 1
+
+ return count
+
+
+def get_newlines_count_before(span, contents):
+ # Determine the range of newline characters.
+ count = 0
+ for i in range(span.start - 1, 0, -1):
+ assert contents[i] != "\r", "This linter does not handle \\r characters."
+ if contents[i] != "\n":
+ break
+ count += 1
+
+ return count
+
+
+def get_exclusions(root):
+ with open(
+ mozpath.join(root, "tools", "lint", "fluent-lint", "exclusions.yml")
+ ) as f:
+ exclusions = list(yaml.safe_load_all(f))[0]
+ for error_type in exclusions:
+ exclusions[error_type]["files"] = set(
+ [mozpath.join(root, x) for x in exclusions[error_type]["files"]]
+ )
+ return exclusions
+
+
+def get_branding_list(root, brand_files):
+ class MessageExtractor(visitor.Visitor):
+ def __init__(self):
+ self.brands = []
+ self.last_message_id = None
+
+ def visit_Term(self, node):
+ self.last_message_id = node.id.name
+ self.generic_visit(node)
+
+ def visit_TextElement(self, node):
+ if self.last_message_id:
+ self.brands += [node.value]
+ self.last_message_id = None
+ self.generic_visit(node)
+
+ extractor = MessageExtractor()
+
+ for brand_path in brand_files:
+ brand_file = mozpath.join(root, brand_path)
+ if os.path.exists(brand_file):
+ with open(brand_file, encoding="utf-8") as f:
+ messages = parse(f.read())
+ extractor.visit(messages)
+
+ return list(set(extractor.brands))
+
+
+def lint(paths, config, fix=None, **lintargs):
+ root = lintargs["root"]
+ files = list(expand_exclusions(paths, config, root))
+ exclusions = get_exclusions(root)
+ brand_files = config.get("brand-files")
+ brand_names = get_branding_list(root, brand_files)
+ results = []
+ for path in files:
+ contents = open(path, "r", encoding="utf-8").read()
+ linter = Linter(
+ path,
+ config,
+ exclusions,
+ contents,
+ get_offsets_and_lines(contents),
+ brand_names,
+ )
+ linter.visit(parse(contents))
+ results.extend(linter.results)
+ return results
diff --git a/tools/lint/fluent-lint/exclusions.yml b/tools/lint/fluent-lint/exclusions.yml
new file mode 100644
index 0000000000..ebcd1b0029
--- /dev/null
+++ b/tools/lint/fluent-lint/exclusions.yml
@@ -0,0 +1,198 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Warning: Only exclusions for identifiers (ID01) are currently allowed.
+---
+# Only add exceptions to this file if the ID is generated programmatically and
+# can't easily be changed to follow the naming convention.
+# Only lowercase letters and hyphens should be used in Fluent IDs.
+ID01:
+ messages:
+ - trademarkInfo
+ - crashed-include-URL-2
+ - blocklist-item-moz-std-listName
+ - blocklist-item-moz-full-listName
+ - shortcuts-browserAction2
+ - shortcuts-pageAction
+ - shortcuts-sidebarAction
+ - about-networking-originAttributesSuffix
+ - size-KB
+ - size-MB
+ - size-GB
+ - state-dd-Disabled
+ - state-dd-Disabled-block-list-state
+ - memory-unit-B
+ - memory-unit-KB
+ - memory-unit-MB
+ - memory-unit-GB
+ - memory-unit-TB
+ - memory-unit-PB
+ - memory-unit-EB
+ - enableSafeBrowsing-label
+ - about-telemetry-show-in-Firefox-json-viewer
+ - url-classifier-search-listType
+ # aboutDialog.ftl: Do not add new exceptions for this file,
+ # new strings should follow the naming convention.
+ - aboutDialog-title
+ - releaseNotes-link
+ - update-checkForUpdatesButton
+ - update-updateButton
+ - update-checkingForUpdates
+ - update-adminDisabled
+ - update-noUpdatesFound
+ - update-otherInstanceHandlingUpdates
+ - warningDesc-version
+ - bottomLinks-license
+ - bottomLinks-rights
+ - bottomLinks-privacy
+ - aboutDialog-version
+ - aboutDialog-version-nightly
+ # certError.ftl: These IDs are generated programmatically
+ # from certificate error codes.
+ - connectionFailure-title
+ - deniedPortAccess-title
+ - dnsNotFound-title
+ - fileNotFound-title
+ - fileAccessDenied-title
+ - captivePortal-title
+ - malformedURI-title
+ - netInterrupt-title
+ - notCached-title
+ - netOffline-title
+ - contentEncodingError-title
+ - unsafeContentType-title
+ - netReset-title
+ - netTimeout-title
+ - unknownProtocolFound-title
+ - proxyConnectFailure-title
+ - proxyResolveFailure-title
+ - redirectLoop-title
+ - unknownSocketType-title
+ - nssFailure2-title
+ - corruptedContentError-title
+ - sslv3Used-title
+ - inadequateSecurityError-title
+ - blockedByPolicy-title
+ - clockSkewError-title
+ - networkProtocolError-title
+ - nssBadCert-title
+ - nssBadCert-sts-title
+ files:
+ # policies-descriptions.ftl: These IDs are generated programmatically
+ # from policy names.
+ - browser/locales/en-US/browser/policies/policies-descriptions.ftl
+ # The webext-perms-description-* IDs are generated programmatically
+ # from permission names
+ - toolkit/locales/en-US/toolkit/global/extensionPermissions.ftl
+ID02:
+ messages:
+ # browser/components/ion/content/ion.ftl
+ - ion
+ # browser/locales/en-US/browser/aboutDialog.ftl
+ - helpus
+ # browser/locales/en-US/browser/aboutLogins.ftl
+ - menu
+ # browser/locales/en-US/browser/pageInfo.ftl
+ - copy
+ - perm-tab
+ # browser/locales/en-US/browser/tabContextMenu.ftl
+ - pin-tab
+ # browser/locales/en-US/browser/touchbar/touchbar.ftl
+ - back
+ - forward
+ - reload
+ - home
+ - find
+ - new-tab
+ - share
+ # toolkit/locales/en-US/toolkit/about/aboutPerformance.ftl
+ - type-tab
+ - size-KB
+ - size-MB
+ - size-GB
+ - item
+ # toolkit/locales/en-US/toolkit/about/aboutPlugins.ftl
+ - file-dd
+ - path-dd
+ # toolkit/locales/en-US/toolkit/about/aboutServiceWorkers.ftl
+ - scope
+ - waiting
+ # toolkit/locales/en-US/toolkit/about/aboutSupport.ftl
+ # yaml interprets yes and no as booleans if quotes are not present.
+ - "yes"
+ - "no"
+ - unknown
+ - found
+ - missing
+ - gpu-ram
+ - apz-none
+ # toolkit/locales/en-US/toolkit/printing/printDialogs.ftl
+ - portrait
+ - scale
+ - print-bg
+ - hf-blank
+ - hf-title
+ - hf-url
+ - hf-page
+ files: []
+# Hard-coded brand names like Firefox or Mozilla should be used only in
+# specific cases, in all other cases the corresponding terms should be used.
+# Check with the localization team for advice.
+CO01:
+ messages:
+ # browser/branding/official/locales/en-US/brand.ftl
+ - trademarkInfo
+ # toolkit/locales/en-US/toolkit/neterror/certError.ftl
+ - cert-error-mitm-mozilla
+ - cert-error-mitm-connection
+ # browser/locales/en-US/browser/appExtensionFields.ftl
+ - extension-firefox-alpenglow-name
+ # browser/locales/en-US/browser/browser.ftl
+ - identity-custom-root
+ - identity-description-custom-root2
+ # browser/locales/en-US/browser/migration.ftl
+ - import-from-firefox
+ # browser/locales/en-US/browser/migrationWizard.ftl
+ - migration-wizard-migrator-display-name-firefox
+ # browser/locales/en-US/browser/newtab/onboarding.ftl
+ - mr1-onboarding-welcome-image-caption
+ - mr2022-onboarding-gratitude-subtitle
+ # browser/locales/en-US/browser/policies/policies-descriptions.ftl
+ - policy-DisableFirefoxScreenshots
+ # browser/locales/en-US/browser/preferences/preferences.ftl
+ - sync-engine-addons
+ - sync-mobile-promo
+ # browser/locales/en-US/browser/protectionsPanel.ftl
+ - protections-panel-content-blocking-breakage-report-view-description
+ # devtools/client/locales/en-US/aboutdebugging.ftl
+ - about-debugging-setup-usb-step-enable-debug-firefox2
+ - about-debugging-browser-version-too-old-fennec
+ - about-debugging-browser-version-too-recent
+ # devtools/client/locales/en-US/application.ftl
+ - manifest-loaded-devtools-error
+ # toolkit/locales/en-US/toolkit/about/aboutAddons.ftl
+ - addon-badge-line3
+ - recommended-theme-1
+ # toolkit/locales/en-US/toolkit/about/aboutGlean.ftl
+ - about-glean-description
+ # toolkit/locales/en-US/toolkit/about/aboutPlugins.ftl
+ - plugins-openh264-description
+ # toolkit/locales/en-US/toolkit/about/aboutRights.ftl
+ - rights-intro-point-1
+ - rights-intro-point-2
+ # toolkit/locales/en-US/toolkit/about/aboutSupport.ftl
+ - app-basics-key-mozilla
+ - virtual-monitor-disp
+ # toolkit/locales/en-US/toolkit/about/aboutTelemetry.ftl
+ - about-telemetry-firefox-data-doc
+ - about-telemetry-telemetry-client-doc
+ - about-telemetry-telemetry-dashboard
+ # toolkit/locales/en-US/toolkit/global/extensionPermissions.ftl
+ - webext-perms-description-management
+ # toolkit/locales/en-US/toolkit/global/processTypes.ftl
+ - process-type-privilegedmozilla
+ files:
+ - browser/components/ion/content/ion.ftl
+ - browser/locales/en-US/browser/profile/default-bookmarks.ftl
+ - toolkit/locales/en-US/toolkit/about/aboutMozilla.ftl