summaryrefslogtreecommitdiffstats
path: root/taskcluster/taskgraph/util/hash.py
blob: 04b946be71daf3a6fff541512150b02c752620ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function, unicode_literals
from mozbuild.util import memoize
import mozpack.path as mozpath
from mozversioncontrol import get_repository_object
import hashlib
import io
import six


@memoize
def hash_path(path):
    """Hash a single file.

    Returns the SHA-256 hash in hex form.
    """
    with io.open(path, mode="rb") as fh:
        return hashlib.sha256(fh.read()).hexdigest()


@memoize
def get_file_finder(base_path):
    return get_repository_object(base_path).get_tracked_files_finder()


def hash_paths(base_path, patterns):
    """
    Give a list of path patterns, return a digest of the contents of all
    the corresponding files, similarly to git tree objects or mercurial
    manifests.

    Each file is hashed. The list of all hashes and file paths is then
    itself hashed to produce the result.
    """
    finder = get_file_finder(base_path)
    h = hashlib.sha256()
    files = {}
    for pattern in patterns:
        found = list(finder.find(pattern))
        if found:
            files.update(found)
        else:
            raise Exception("%s did not match anything" % pattern)
    for path in sorted(files.keys()):
        if path.endswith((".pyc", ".pyd", ".pyo")):
            continue
        h.update(
            six.ensure_binary(
                "{} {}\n".format(
                    hash_path(mozpath.abspath(mozpath.join(base_path, path))),
                    mozpath.normsep(path),
                )
            )
        )
    return h.hexdigest()