summaryrefslogtreecommitdiffstats
path: root/check_whence.py
blob: fd74a56a5c5a1fbbc989588626ba1d9bc072d602 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/python3

import os, re, sys
from io import open


def list_whence():
    with open("WHENCE", encoding="utf-8") as whence:
        for line in whence:
            match = re.match(r'(?:RawFile|File|Source):\s*"(.*)"', line)
            if match:
                yield match.group(1)
                continue
            match = re.match(r"(?:RawFile|File|Source):\s*(\S*)", line)
            if match:
                yield match.group(1)
                continue
            match = re.match(
                r"Licen[cs]e: (?:.*\bSee (.*) for details\.?|(\S*))\n", line
            )
            if match:
                if match.group(1):
                    for name in re.split(r", | and ", match.group(1)):
                        yield name
                    continue
                if match.group(2):
                    # Just one word - may or may not be a filename
                    if not re.search(
                        r"unknown|distributable", match.group(2), re.IGNORECASE
                    ):
                        yield match.group(2)
                        continue


def list_whence_files():
    with open("WHENCE", encoding="utf-8") as whence:
        for line in whence:
            match = re.match(r"(?:RawFile|File):\s*(.*)", line)
            if match:
                yield match.group(1).replace("\ ", " ").replace('"', "")
                continue


def list_links_list():
    with open("WHENCE", encoding="utf-8") as whence:
        for line in whence:
            match = re.match(r"Link:\s*(.*)", line)
            if match:
                linkname, target = match.group(1).split("->")

                linkname = linkname.strip().replace("\ ", " ").replace('"', "")
                target = target.strip().replace("\ ", " ").replace('"', "")

                # Link target is relative to the link
                target = os.path.join(os.path.dirname(linkname), target)
                target = os.path.normpath(target)

                yield (linkname, target)
                continue


def list_git():
    with os.popen("git ls-files") as git_files:
        for line in git_files:
            yield line.rstrip("\n")


def main():
    ret = 0
    whence_list = list(list_whence())
    whence_files = list(list_whence_files())
    links_list = list(list_links_list())
    whence_links = list(zip(*links_list))[0]
    known_files = set(name for name in whence_list if not name.endswith("/")) | set(
        [
            ".gitignore",
            ".codespell.cfg",
            ".gitlab-ci.yml",
            ".pre-commit-config.yaml",
            "build_packages.py",
            "check_whence.py",
            "configure",
            "Makefile",
            "README.md",
            "copy-firmware.sh",
            "WHENCE",
            "Dockerfile",
            "contrib/templates/debian.changelog",
            "contrib/templates/debian.control",
            "contrib/templates/debian.copyright",
            "contrib/templates/rpm.spec",
            "contrib/process_linux_firmware.py",
        ]
    )
    known_prefixes = set(name for name in whence_list if name.endswith("/"))
    git_files = set(list_git())

    for name in set(name for name in whence_files if name.endswith("/")):
        sys.stderr.write("E: %s listed in WHENCE as File, but is directory\n" % name)
        ret = 1

    for name in set(fw for fw in whence_files if whence_files.count(fw) > 1):
        sys.stderr.write("E: %s listed in WHENCE twice\n" % name)
        ret = 1

    for name in set(link for link in whence_links if whence_links.count(link) > 1):
        sys.stderr.write("E: %s listed in WHENCE twice\n" % name)
        ret = 1

    for name in set(link for link in whence_files if os.path.islink(link)):
        sys.stderr.write("E: %s listed in WHENCE as File, but is a symlink\n" % name)
        ret = 1

    for name in set(link[0] for link in links_list if os.path.islink(link[0])):
        sys.stderr.write("E: %s listed in WHENCE as Link, is in tree\n" % name)
        ret = 1

    for name in sorted(list(known_files - git_files)):
        sys.stderr.write("E: %s listed in WHENCE does not exist\n" % name)
        ret = 1

    # A link can point to another link, or to a file...
    valid_targets = set(link[0] for link in links_list) | git_files

    # ... or to a directory
    for target in set(valid_targets):
        dirname = target
        while True:
            dirname = os.path.dirname(dirname)
            if dirname == "":
                break
            valid_targets.add(dirname)

    for name, target in sorted(links_list):
        if target not in valid_targets:
            sys.stderr.write(
                "E: target %s of link %s in WHENCE" " does not exist\n" % (target, name)
            )
            ret = 1

    for name in sorted(list(git_files - known_files)):
        # Ignore subdirectory changelogs and GPG detached signatures
        if name.endswith("/ChangeLog") or (
            name.endswith(".asc") and name[:-4] in known_files
        ):
            continue

        # Ignore unknown files in known directories
        for prefix in known_prefixes:
            if name.startswith(prefix):
                break
        else:
            sys.stderr.write("E: %s not listed in WHENCE\n" % name)
            ret = 1
    return ret


if __name__ == "__main__":
    sys.exit(main())