From 029acfb922bdd25d6e38c864895c6cc66db76d13 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Mon, 20 Aug 2001 21:24:19 +0000 Subject: Deal more appropriately with bare ampersands and pointy brackets; this module has to deal with "class" HTML-as-deployed as well as XHTML, so we cannot be as strict as XHTML allows. This closes SF bug #453059, but uses a different fix than suggested in the bug comments. --- Lib/HTMLParser.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'Lib/HTMLParser.py') diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 39a5d8262f..954ce2647f 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -15,7 +15,8 @@ import string interesting_normal = re.compile('[&<]') interesting_cdata = re.compile(r'<(/|\Z)') -incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*|#[0-9]*)?') +incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*' + '|#([0-9]*|[xX][0-9a-fA-F]*))?') entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') @@ -185,11 +186,8 @@ class HTMLParser: elif declopen.match(rawdata, i): #