From ae8fd91d9ee289f31a30cc7a435fd077dface481 Mon Sep 17 00:00:00 2001 From: Philipp von Weitershausen Date: Sun, 14 Jan 2007 13:54:17 +0000 Subject: Merge from 3.3 branch: ------------------------------------------------------------------------ r72022 | philikon | 2007-01-14 14:45:26 +0100 (Sun, 14 Jan 2007) | 5 lines zope.tal.xmlparser.XMLParser couldn't deal with unicode strings, which meant that PageTemplates in XML mode whose source code was available as a unicode string failed. Fixed the problem and added a test that exercises a PT w/ unicode source in XML mode (HTML mode already worked). ------------------------------------------------------------------------ --- tests/test_xmlparser.py | 6 ++++++ xmlparser.py | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/tests/test_xmlparser.py b/tests/test_xmlparser.py index 3647872..02d5848 100644 --- a/tests/test_xmlparser.py +++ b/tests/test_xmlparser.py @@ -249,6 +249,12 @@ text def test_declaration_junk_chars(self): self._parse_error("") + def test_unicode_string(self): + output = [('starttag', u'p', []), + ('data', u'\xe4\xf6\xfc\xdf'), + ('endtag', u'p')] + self._run_check(u'

\xe4\xf6\xfc\xdf

', output) + # Support for the Zope regression test framework: def test_suite(skipxml=utils.skipxml): diff --git a/xmlparser.py b/xmlparser.py index 593bcbe..aafa693 100644 --- a/xmlparser.py +++ b/xmlparser.py @@ -74,6 +74,11 @@ class XMLParser(object): self.parseStream(open(filename)) def parseString(self, s): + if isinstance(s, unicode): + # Expat cannot deal with unicode strings, only with + # encoded ones. Also, its range of encodings is rather + # limited, UTF-8 is the safest bet here. + s = s.encode('utf-8') self.parser.Parse(s, 1) def parseURL(self, url): -- cgit v1.2.1