summaryrefslogtreecommitdiffstats
path: root/testing/mozharness/mozharness/mozilla/merkle.py
blob: dba780b73a14aea3454f527c01207942e8bac77f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import struct


def _round2(n):
    k = 1
    while k < n:
        k <<= 1
    return k >> 1


def _leaf_hash(hash_fn, leaf):
    return hash_fn(b"\x00" + leaf).digest()


def _pair_hash(hash_fn, left, right):
    return hash_fn(b"\x01" + left + right).digest()


class InclusionProof:
    """
    Represents a Merkle inclusion proof for purposes of serialization,
    deserialization, and verification of the proof.  The format for inclusion
    proofs in RFC 6962-bis is as follows:

        opaque LogID<2..127>;
        opaque NodeHash<32..2^8-1>;

        struct {
            LogID log_id;
            uint64 tree_size;
            uint64 leaf_index;
            NodeHash inclusion_path<1..2^16-1>;
        } InclusionProofDataV2;

    In other words:
      - 1 + N octets of log_id (currently zero)
      - 8 octets of tree_size = self.n
      - 8 octets of leaf_index = m
      - 2 octets of path length, followed by
      * 1 + N octets of NodeHash
    """

    # Pre-generated 'log ID'.  Not used by Firefox; it is only needed because
    # there's a slot in the RFC 6962-bis format that requires a value at least
    # two bytes long (plus a length byte).
    LOG_ID = b"\x02\x00\x00"

    def __init__(self, tree_size, leaf_index, path_elements):
        self.tree_size = tree_size
        self.leaf_index = leaf_index
        self.path_elements = path_elements

    @staticmethod
    def from_rfc6962_bis(serialized):
        start = 0
        read = 1
        if len(serialized) < start + read:
            raise Exception("Inclusion proof too short for log ID header")
        (log_id_len,) = struct.unpack("B", serialized[start : start + read])
        start += read
        start += log_id_len  # Ignore the log ID itself

        read = 8 + 8 + 2
        if len(serialized) < start + read:
            raise Exception("Inclusion proof too short for middle section")
        tree_size, leaf_index, path_len = struct.unpack(
            "!QQH", serialized[start : start + read]
        )
        start += read

        path_elements = []
        end = 1 + log_id_len + 8 + 8 + 2 + path_len
        while start < end:
            read = 1
            if len(serialized) < start + read:
                raise Exception("Inclusion proof too short for middle section")
            (elem_len,) = struct.unpack("!B", serialized[start : start + read])
            start += read

            read = elem_len
            if len(serialized) < start + read:
                raise Exception("Inclusion proof too short for middle section")
            if end < start + read:
                raise Exception("Inclusion proof element exceeds declared length")
            path_elements.append(serialized[start : start + read])
            start += read

        return InclusionProof(tree_size, leaf_index, path_elements)

    def to_rfc6962_bis(self):
        inclusion_path = b""
        for step in self.path_elements:
            step_len = struct.pack("B", len(step))
            inclusion_path += step_len + step

        middle = struct.pack(
            "!QQH", self.tree_size, self.leaf_index, len(inclusion_path)
        )
        return self.LOG_ID + middle + inclusion_path

    def _expected_head(self, hash_fn, leaf, leaf_index, tree_size):
        node = _leaf_hash(hash_fn, leaf)

        # Compute indicators of which direction the pair hashes should be done.
        # Derived from the PATH logic in draft-ietf-trans-rfc6962-bis
        lr = []
        while tree_size > 1:
            k = _round2(tree_size)
            left = leaf_index < k
            lr = [left] + lr

            if left:
                tree_size = k
            else:
                tree_size = tree_size - k
                leaf_index = leaf_index - k

        assert len(lr) == len(self.path_elements)
        for i, elem in enumerate(self.path_elements):
            if lr[i]:
                node = _pair_hash(hash_fn, node, elem)
            else:
                node = _pair_hash(hash_fn, elem, node)

        return node

    def verify(self, hash_fn, leaf, leaf_index, tree_size, tree_head):
        return self._expected_head(hash_fn, leaf, leaf_index, tree_size) == tree_head


class MerkleTree:
    """
    Implements a Merkle tree on a set of data items following the
    structure defined in RFC 6962-bis.  This allows us to create a
    single hash value that summarizes the data (the 'head'), and an
    'inclusion proof' for each element that connects it to the head.

    https://tools.ietf.org/html/draft-ietf-trans-rfc6962-bis-24
    """

    def __init__(self, hash_fn, data):
        self.n = len(data)
        self.hash_fn = hash_fn

        # We cache intermediate node values, as a dictionary of dictionaries,
        # where the node representing data elements data[m:n] is represented by
        # nodes[m][n]. This corresponds to the 'D[m:n]' notation in RFC
        # 6962-bis.  In particular, the leaves are stored in nodes[i][i+1] and
        # the head is nodes[0][n].
        self.nodes = {}
        for i in range(self.n):
            self.nodes[i, i + 1] = _leaf_hash(self.hash_fn, data[i])

    def _node(self, start, end):
        if (start, end) in self.nodes:
            return self.nodes[start, end]

        k = _round2(end - start)
        left = self._node(start, start + k)
        right = self._node(start + k, end)
        node = _pair_hash(self.hash_fn, left, right)

        self.nodes[start, end] = node
        return node

    def head(self):
        return self._node(0, self.n)

    def _relative_proof(self, target, start, end):
        n = end - start
        k = _round2(n)

        if n == 1:
            return []
        elif target - start < k:
            return self._relative_proof(target, start, start + k) + [
                self._node(start + k, end)
            ]
        elif target - start >= k:
            return self._relative_proof(target, start + k, end) + [
                self._node(start, start + k)
            ]

    def inclusion_proof(self, leaf_index):
        path_elements = self._relative_proof(leaf_index, 0, self.n)
        return InclusionProof(self.n, leaf_index, path_elements)