1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import sys
import unicodedata
from mozlint import result
from mozlint.pathutils import expand_exclusions
# Code inspired by Red Hat
# https://github.com/siddhesh/find-unicode-control/
# published under the 'BSD 3-Clause' license
# https://access.redhat.com/security/vulnerabilities/RHSB-2021-007
results = []
disallowed = set(
chr(c) for c in range(sys.maxunicode) if unicodedata.category(chr(c)) == "Cf"
)
def getfiletext(config, filename):
# Make a text string from a file, attempting to decode from latin1 if necessary.
# Other non-utf-8 locales are not supported at the moment.
with open(filename, "rb") as infile:
try:
return infile.read().decode("utf-8")
except Exception as e:
res = {
"path": filename,
"message": "Could not open file as utf-8 - maybe an encoding error: %s"
% e,
"level": "error",
}
results.append(result.from_config(config, **res))
return None
return None
def analyze_text(filename, text, disallowed):
line = 0
for t in text.splitlines():
line = line + 1
subset = [c for c in t if chr(ord(c)) in disallowed]
if subset:
return (subset, line)
return ("", 0)
def lint(paths, config, **lintargs):
files = list(expand_exclusions(paths, config, lintargs["root"]))
for f in files:
text = getfiletext(config, f)
if text:
(subset, line) = analyze_text(f, text, disallowed)
if subset:
res = {
"path": f,
"lineno": line,
"message": "disallowed characters: %s" % subset,
"level": "error",
}
results.append(result.from_config(config, **res))
return {"results": results, "fixed": 0}
|