summaryrefslogtreecommitdiffstats
path: root/tests/support/structural_diff.py
blob: cea27d1039d1b387591c1098ed58b51da3e348c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import html.parser


class Parser(html.parser.HTMLParser):
    def __init__(self):
        super().__init__()
        self._stream = []

    def handle_starttag(self, tag, attrs):
        attrs = sorted(attrs, key=lambda x: x[0])
        attrs = '|'.join([k[0] + ':' + k[1] for k in attrs])
        self._stream.append(('<', tag, attrs))

    def handle_endtag(self, tag):
        self._stream.append(('>', tag, ''))

    def handle_data(self, data):
        self._stream.append(('_', data, ''))

    @property
    def stream(self):
        return self._stream


def _serialize(t):
    parser = Parser()
    parser.feed(t)
    return parser.stream


def structural_diff(a, b):
    """Check if there is a structural difference between two HTML files."""
    a_s = _serialize(a)
    b_s = _serialize(b)

    for e, f in zip(a_s, b_s):
        assert e == f, f'Expected: {e}, found: {f}'