summaryrefslogtreecommitdiffstats
path: root/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
blob: 5d884fc3188a2da573e490fb29136bb8e16969f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import hashlib
from pathlib import Path

from taskgraph.util import path as mozpath
from taskgraph.util.memoize import memoize


@memoize
def hash_path(path):
    """Hash a single file.

    Returns the SHA-256 hash in hex form.
    """
    with open(path, "rb") as fh:
        return hashlib.sha256(fh.read()).hexdigest()


def hash_paths(base_path, patterns):
    """
    Give a list of path patterns, return a digest of the contents of all
    the corresponding files, similarly to git tree objects or mercurial
    manifests.

    Each file is hashed. The list of all hashes and file paths is then
    itself hashed to produce the result.
    """
    h = hashlib.sha256()

    found = set()
    for pattern in patterns:
        matches = _find_matching_files(base_path, pattern)
        if matches:
            found.update(matches)
        else:
            raise Exception("%s did not match anything" % pattern)
    for path in sorted(found):
        h.update(
            "{} {}\n".format(
                hash_path(mozpath.abspath(mozpath.join(base_path, path))),
                mozpath.normsep(path),
            ).encode("utf-8")
        )
    return h.hexdigest()


@memoize
def _find_matching_files(base_path, pattern):
    files = _get_all_files(base_path)
    return [path for path in files if mozpath.match(path, pattern)]


@memoize
def _get_all_files(base_path):
    return [str(path) for path in Path(base_path).rglob("*") if path.is_file()]