blob: d2e4d7f203cf6147a521e5fb79ab20f01329ba99 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
import lxml.html
import lxml.etree
import pytest
def _serialize(t):
for a, e in lxml.etree.iterwalk(t, events=("start", "end"),):
text = e.text.strip() if e.text else ""
yield (a, e.tag, repr(text), ', '.join([k[0]+':'+k[1] for k in sorted(e.attrib.items(), key = lambda x: x[0])]))
def structural_diff(a, b):
"""Check if there is a structural difference between two HTML files."""
a_s = _serialize(lxml.html.fromstring(a))
b_s = _serialize(lxml.html.fromstring(b))
for e, f in zip(a_s, b_s):
print(e, f)
assert e == f
|