summaryrefslogtreecommitdiff
path: root/Lib/sgmllib.py
diff options
context:
space:
mode:
authorThomas Wouters <thomas@python.org>2006-04-21 10:40:58 +0000
committerThomas Wouters <thomas@python.org>2006-04-21 10:40:58 +0000
commit49fd7fa4431da299196d74087df4a04f99f9c46f (patch)
tree35ace5fe78d3d52c7a9ab356ab9f6dbf8d4b71f4 /Lib/sgmllib.py
parent9ada3d6e29d5165dadacbe6be07bcd35cfbef59d (diff)
downloadcpython-git-49fd7fa4431da299196d74087df4a04f99f9c46f.tar.gz
Merge p3yk branch with the trunk up to revision 45595. This breaks a fair
number of tests, all because of the codecs/_multibytecodecs issue described here (it's not a Py3K issue, just something Py3K discovers): http://mail.python.org/pipermail/python-dev/2006-April/064051.html Hye-Shik Chang promised to look for a fix, so no need to fix it here. The tests that are expected to break are: test_codecencodings_cn test_codecencodings_hk test_codecencodings_jp test_codecencodings_kr test_codecencodings_tw test_codecs test_multibytecodec This merge fixes an actual test failure (test_weakref) in this branch, though, so I believe merging is the right thing to do anyway.
Diffstat (limited to 'Lib/sgmllib.py')
-rw-r--r--Lib/sgmllib.py34
1 files changed, 31 insertions, 3 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index 08e365bdef..3e85a910e0 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -269,9 +269,37 @@ class SGMLParser(markupbase.ParserBase):
attrname, rest, attrvalue = match.group(1, 2, 3)
if not rest:
attrvalue = attrname
- elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
- attrvalue[:1] == '"' == attrvalue[-1:]:
- attrvalue = attrvalue[1:-1]
+ else:
+ if (attrvalue[:1] == "'" == attrvalue[-1:] or
+ attrvalue[:1] == '"' == attrvalue[-1:]):
+ # strip quotes
+ attrvalue = attrvalue[1:-1]
+ l = 0
+ new_attrvalue = ''
+ while l < len(attrvalue):
+ av_match = entityref.match(attrvalue, l)
+ if (av_match and av_match.group(1) in self.entitydefs and
+ attrvalue[av_match.end(1)] == ';'):
+ # only substitute entityrefs ending in ';' since
+ # otherwise we may break <a href='?p=x&q=y'>
+ # which is very common
+ new_attrvalue += self.entitydefs[av_match.group(1)]
+ l = av_match.end(0)
+ continue
+ ch_match = charref.match(attrvalue, l)
+ if ch_match:
+ try:
+ char = chr(int(ch_match.group(1)))
+ new_attrvalue += char
+ l = ch_match.end(0)
+ continue
+ except ValueError:
+ # invalid character reference, don't substitute
+ pass
+ # all other cases
+ new_attrvalue += attrvalue[l]
+ l += 1
+ attrvalue = new_attrvalue
attrs.append((attrname.lower(), attrvalue))
k = match.end(0)
if rawdata[j] == '>':