import html.parser class Parser(html.parser.HTMLParser): def __init__(self): super().__init__() self._stream = [] def handle_starttag(self, tag, attrs): attrs = sorted(attrs, key=lambda x: x[0]) attrs = '|'.join([k[0] + ':' + k[1] for k in attrs]) self._stream.append(('<', tag, attrs)) def handle_endtag(self, tag): self._stream.append(('>', tag, '')) def handle_data(self, data): self._stream.append(('_', data, '')) @property def stream(self): return self._stream def _serialize(t): parser = Parser() parser.feed(t) return parser.stream def structural_diff(a, b): """Check if there is a structural difference between two HTML files.""" a_s = _serialize(a) b_s = _serialize(b) for e, f in zip(a_s, b_s): assert e == f, f'Expected: {e}, found: {f}'