From ae8fd91d9ee289f31a30cc7a435fd077dface481 Mon Sep 17 00:00:00 2001
From: Philipp von Weitershausen <philipp@weitershausen.de>
Date: Sun, 14 Jan 2007 13:54:17 +0000
Subject: Merge from 3.3 branch:
 ------------------------------------------------------------------------
 r72022 | philikon | 2007-01-14 14:45:26 +0100 (Sun, 14 Jan 2007) | 5 lines

zope.tal.xmlparser.XMLParser couldn't deal with unicode strings, which meant that
PageTemplates in XML mode whose source code was available as a unicode string failed.
Fixed the problem and added a test that exercises a PT w/ unicode source in XML mode
(HTML mode already worked).

------------------------------------------------------------------------
---
 tests/test_xmlparser.py | 6 ++++++
 xmlparser.py            | 5 +++++
 2 files changed, 11 insertions(+)
diff --git a/tests/test_xmlparser.py b/tests/test_xmlparser.py
index 3647872..02d5848 100644
--- a/tests/test_xmlparser.py
+++ b/tests/test_xmlparser.py
@@ -249,6 +249,12 @@ text
     def test_declaration_junk_chars(self):
         self._parse_error("<!DOCTYPE foo $ >")
 
+    def test_unicode_string(self):
+        output = [('starttag', u'p', []),
+                  ('data', u'\xe4\xf6\xfc\xdf'),
+                  ('endtag', u'p')]
+        self._run_check(u'<p>\xe4\xf6\xfc\xdf</p>', output)
+
 
 # Support for the Zope regression test framework:
 def test_suite(skipxml=utils.skipxml):
diff --git a/xmlparser.py b/xmlparser.py
index 593bcbe..aafa693 100644
--- a/xmlparser.py
+++ b/xmlparser.py
@@ -74,6 +74,11 @@ class XMLParser(object):
         self.parseStream(open(filename))
 
     def parseString(self, s):
+        if isinstance(s, unicode):
+            # Expat cannot deal with unicode strings, only with
+            # encoded ones.  Also, its range of encodings is rather
+            # limited, UTF-8 is the safest bet here.
+            s = s.encode('utf-8')
         self.parser.Parse(s, 1)
 
     def parseURL(self, url):
-- 
cgit v1.2.1