diff options
Diffstat (limited to 'testing/mozharness/mozharness/mozilla/merkle.py')
-rw-r--r-- | testing/mozharness/mozharness/mozilla/merkle.py | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/testing/mozharness/mozharness/mozilla/merkle.py b/testing/mozharness/mozharness/mozilla/merkle.py new file mode 100644 index 0000000000..dba780b73a --- /dev/null +++ b/testing/mozharness/mozharness/mozilla/merkle.py @@ -0,0 +1,190 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import struct + + +def _round2(n): + k = 1 + while k < n: + k <<= 1 + return k >> 1 + + +def _leaf_hash(hash_fn, leaf): + return hash_fn(b"\x00" + leaf).digest() + + +def _pair_hash(hash_fn, left, right): + return hash_fn(b"\x01" + left + right).digest() + + +class InclusionProof: + """ + Represents a Merkle inclusion proof for purposes of serialization, + deserialization, and verification of the proof. The format for inclusion + proofs in RFC 6962-bis is as follows: + + opaque LogID<2..127>; + opaque NodeHash<32..2^8-1>; + + struct { + LogID log_id; + uint64 tree_size; + uint64 leaf_index; + NodeHash inclusion_path<1..2^16-1>; + } InclusionProofDataV2; + + In other words: + - 1 + N octets of log_id (currently zero) + - 8 octets of tree_size = self.n + - 8 octets of leaf_index = m + - 2 octets of path length, followed by + * 1 + N octets of NodeHash + """ + + # Pre-generated 'log ID'. Not used by Firefox; it is only needed because + # there's a slot in the RFC 6962-bis format that requires a value at least + # two bytes long (plus a length byte). + LOG_ID = b"\x02\x00\x00" + + def __init__(self, tree_size, leaf_index, path_elements): + self.tree_size = tree_size + self.leaf_index = leaf_index + self.path_elements = path_elements + + @staticmethod + def from_rfc6962_bis(serialized): + start = 0 + read = 1 + if len(serialized) < start + read: + raise Exception("Inclusion proof too short for log ID header") + (log_id_len,) = struct.unpack("B", serialized[start : start + read]) + start += read + start += log_id_len # Ignore the log ID itself + + read = 8 + 8 + 2 + if len(serialized) < start + read: + raise Exception("Inclusion proof too short for middle section") + tree_size, leaf_index, path_len = struct.unpack( + "!QQH", serialized[start : start + read] + ) + start += read + + path_elements = [] + end = 1 + log_id_len + 8 + 8 + 2 + path_len + while start < end: + read = 1 + if len(serialized) < start + read: + raise Exception("Inclusion proof too short for middle section") + (elem_len,) = struct.unpack("!B", serialized[start : start + read]) + start += read + + read = elem_len + if len(serialized) < start + read: + raise Exception("Inclusion proof too short for middle section") + if end < start + read: + raise Exception("Inclusion proof element exceeds declared length") + path_elements.append(serialized[start : start + read]) + start += read + + return InclusionProof(tree_size, leaf_index, path_elements) + + def to_rfc6962_bis(self): + inclusion_path = b"" + for step in self.path_elements: + step_len = struct.pack("B", len(step)) + inclusion_path += step_len + step + + middle = struct.pack( + "!QQH", self.tree_size, self.leaf_index, len(inclusion_path) + ) + return self.LOG_ID + middle + inclusion_path + + def _expected_head(self, hash_fn, leaf, leaf_index, tree_size): + node = _leaf_hash(hash_fn, leaf) + + # Compute indicators of which direction the pair hashes should be done. + # Derived from the PATH logic in draft-ietf-trans-rfc6962-bis + lr = [] + while tree_size > 1: + k = _round2(tree_size) + left = leaf_index < k + lr = [left] + lr + + if left: + tree_size = k + else: + tree_size = tree_size - k + leaf_index = leaf_index - k + + assert len(lr) == len(self.path_elements) + for i, elem in enumerate(self.path_elements): + if lr[i]: + node = _pair_hash(hash_fn, node, elem) + else: + node = _pair_hash(hash_fn, elem, node) + + return node + + def verify(self, hash_fn, leaf, leaf_index, tree_size, tree_head): + return self._expected_head(hash_fn, leaf, leaf_index, tree_size) == tree_head + + +class MerkleTree: + """ + Implements a Merkle tree on a set of data items following the + structure defined in RFC 6962-bis. This allows us to create a + single hash value that summarizes the data (the 'head'), and an + 'inclusion proof' for each element that connects it to the head. + + https://tools.ietf.org/html/draft-ietf-trans-rfc6962-bis-24 + """ + + def __init__(self, hash_fn, data): + self.n = len(data) + self.hash_fn = hash_fn + + # We cache intermediate node values, as a dictionary of dictionaries, + # where the node representing data elements data[m:n] is represented by + # nodes[m][n]. This corresponds to the 'D[m:n]' notation in RFC + # 6962-bis. In particular, the leaves are stored in nodes[i][i+1] and + # the head is nodes[0][n]. + self.nodes = {} + for i in range(self.n): + self.nodes[i, i + 1] = _leaf_hash(self.hash_fn, data[i]) + + def _node(self, start, end): + if (start, end) in self.nodes: + return self.nodes[start, end] + + k = _round2(end - start) + left = self._node(start, start + k) + right = self._node(start + k, end) + node = _pair_hash(self.hash_fn, left, right) + + self.nodes[start, end] = node + return node + + def head(self): + return self._node(0, self.n) + + def _relative_proof(self, target, start, end): + n = end - start + k = _round2(n) + + if n == 1: + return [] + elif target - start < k: + return self._relative_proof(target, start, start + k) + [ + self._node(start + k, end) + ] + elif target - start >= k: + return self._relative_proof(target, start + k, end) + [ + self._node(start, start + k) + ] + + def inclusion_proof(self, leaf_index): + path_elements = self._relative_proof(leaf_index, 0, self.n) + return InclusionProof(self.n, leaf_index, path_elements) |