summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pagetemplatefile.py119
-rw-r--r--tests/test_ptfile.py198
-rw-r--r--typesniffer.py64
3 files changed, 381 insertions, 0 deletions
diff --git a/pagetemplatefile.py b/pagetemplatefile.py
new file mode 100644
index 0000000..3517f5f
--- /dev/null
+++ b/pagetemplatefile.py
@@ -0,0 +1,119 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Filesystem Page Template module
+
+Zope object encapsulating a Page Template from the filesystem.
+
+$Id$
+"""
+
+__all__ = ("PageTemplateFile",)
+
+import os
+import sys
+import re
+import logging
+
+from zope.pagetemplate.pagetemplate import PageTemplate
+
+from typesniffer import sniff_type
+from typesniffer import XML_PREFIX_MAX_LENGTH
+
+DEFAULT_ENCODING = "utf-8"
+
+meta_pattern = re.compile(
+ r'\s*<meta\s+http-equiv=["\']?Content-Type["\']?'
+ r'\s+content=["\']?([^;]+);\s*charset=([^"\']+)["\']?\s*>\s*',
+ re.IGNORECASE)
+
+def package_home(gdict):
+ filename = gdict["__file__"]
+ return os.path.dirname(filename)
+
+class PageTemplateFile(PageTemplate):
+ "Zope wrapper for filesystem Page Template using TAL, TALES, and METAL"
+
+ _v_last_read = 0
+
+ def __init__(self, filename, _prefix=None):
+ path = self.get_path_from_prefix(_prefix)
+ self.filename = os.path.join(path, filename)
+ if not os.path.isfile(self.filename):
+ raise ValueError("No such file", self.filename)
+
+ def get_path_from_prefix(self, _prefix):
+ if isinstance(_prefix, str):
+ path = _prefix
+ else:
+ if _prefix is None:
+ _prefix = sys._getframe(2).f_globals
+ path = package_home(_prefix)
+ return path
+
+ def _prepare_html(self, text):
+ match = meta_pattern.search(text)
+ if match is not None:
+ type, encoding = match.groups()
+ # TODO: Shouldn't <meta>/<?xml?> stripping
+ # be in PageTemplate.__call__()?
+ text = meta_pattern.sub("", text)
+ else:
+ type = None
+ encoding = DEFAULT_ENCODING
+ return unicode(text, encoding), type
+
+ def _read_file(self):
+ __traceback_info__ = self.filename
+ f = open(self.filename, "rb")
+ try:
+ text = f.read(XML_PREFIX_MAX_LENGTH)
+ except:
+ f.close()
+ raise
+ type_ = sniff_type(text)
+ if type_ == "text/xml":
+ text += f.read()
+ else:
+ # For HTML, we really want the file read in text mode:
+ f.close()
+ f = open(self.filename)
+ text = f.read()
+ text, type_ = self._prepare_html(text)
+ f.close()
+ return text, type_
+
+ def _cook_check(self):
+ if self._v_last_read and not __debug__:
+ return
+ __traceback_info__ = self.filename
+ try:
+ mtime = os.path.getmtime(self.filename)
+ except OSError:
+ mtime = 0
+ if self._v_program is not None and mtime == self._v_last_read:
+ return
+ text, type = self._read_file()
+ self.pt_edit(text, type)
+ self._cook()
+ if self._v_errors:
+ logging.error('PageTemplateFile: Error in template: %s',
+ '\n'.join(self._v_errors))
+ return
+ self._v_last_read = mtime
+
+ def pt_source_file(self):
+ return self.filename
+
+ def __getstate__(self):
+ raise TypeError("non-picklable object")
diff --git a/tests/test_ptfile.py b/tests/test_ptfile.py
new file mode 100644
index 0000000..6d8d5fc
--- /dev/null
+++ b/tests/test_ptfile.py
@@ -0,0 +1,198 @@
+##############################################################################
+#
+# Copyright (c) 2004 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Tests of PageTemplateFile.
+
+$Id$
+"""
+import os
+import tempfile
+import unittest
+
+from zope.pagetemplate.pagetemplatefile import PageTemplateFile
+from zope.pagetemplate.typesniffer import sniff_type
+
+class TypeSniffingTestCase(unittest.TestCase):
+
+ TEMPFILENAME = tempfile.mktemp()
+
+ def tearDown(self):
+ if os.path.exists(self.TEMPFILENAME):
+ os.unlink(self.TEMPFILENAME)
+
+ def get_pt(self, text):
+ f = open(self.TEMPFILENAME, "wb")
+ f.write(text)
+ f.close()
+ pt = PageTemplateFile(self.TEMPFILENAME)
+ pt.read()
+ return pt
+
+ def check_content_type(self, text, expected_type):
+ pt = self.get_pt(text)
+ self.assertEqual(pt.content_type, expected_type)
+
+ def test_sniffer_xml_ascii(self):
+ self.check_content_type(
+ "<?xml version='1.0' encoding='ascii'?><doc/>",
+ "text/xml")
+ self.check_content_type(
+ "<?xml\tversion='1.0' encoding='ascii'?><doc/>",
+ "text/xml")
+
+ def test_sniffer_xml_utf8(self):
+ # w/out byte order mark
+ self.check_content_type(
+ "<?xml version='1.0' encoding='utf-8'?><doc/>",
+ "text/xml")
+ self.check_content_type(
+ "<?xml\tversion='1.0' encoding='utf-8'?><doc/>",
+ "text/xml")
+ # with byte order mark
+ self.check_content_type(
+ "\xef\xbb\xbf<?xml version='1.0' encoding='utf-8'?><doc/>",
+ "text/xml")
+ self.check_content_type(
+ "\xef\xbb\xbf<?xml\tversion='1.0' encoding='utf-8'?><doc/>",
+ "text/xml")
+
+ def test_sniffer_xml_utf16_be(self):
+ # w/out byte order mark
+ self.check_content_type(
+ "\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
+ "\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
+ "\0<\0d\0o\0c\0/\0>",
+ "text/xml")
+ self.check_content_type(
+ "\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
+ "\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
+ "\0<\0d\0o\0c\0/\0>",
+ "text/xml")
+ # with byte order mark
+ self.check_content_type(
+ "\xfe\xff"
+ "\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
+ "\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
+ "\0<\0d\0o\0c\0/\0>",
+ "text/xml")
+ self.check_content_type(
+ "\xfe\xff"
+ "\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
+ "\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
+ "\0<\0d\0o\0c\0/\0>",
+ "text/xml")
+
+ def test_sniffer_xml_utf16_le(self):
+ # w/out byte order mark
+ self.check_content_type(
+ "<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
+ " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
+ "<\0d\0o\0c\0/\0>\n",
+ "text/xml")
+ self.check_content_type(
+ "<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
+ " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
+ "<\0d\0o\0c\0/\0>\0",
+ "text/xml")
+ # with byte order mark
+ self.check_content_type(
+ "\xff\xfe"
+ "<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
+ " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
+ "<\0d\0o\0c\0/\0>\0",
+ "text/xml")
+ self.check_content_type(
+ "\xff\xfe"
+ "<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
+ " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
+ "<\0d\0o\0c\0/\0>\0",
+ "text/xml")
+
+ HTML_PUBLIC_ID = "-//W3C//DTD HTML 4.01 Transitional//EN"
+ HTML_SYSTEM_ID = "http://www.w3.org/TR/html4/loose.dtd"
+
+ def test_sniffer_html_ascii(self):
+ self.check_content_type(
+ "<!DOCTYPE html [ SYSTEM '%s' ]><html></html>"
+ % self.HTML_SYSTEM_ID,
+ "text/html")
+ self.check_content_type(
+ "<html><head><title>sample document</title></head></html>",
+ "text/html")
+
+ # TODO: This reflects a case that simply isn't handled by the
+ # sniffer; there are many, but it gets it right more often than
+ # before.
+ def donttest_sniffer_xml_simple(self):
+ self.check_content_type("<doc><element/></doc>",
+ "text/xml")
+
+ def test_html_default_encoding(self):
+ pt = self.get_pt(
+ "<html><head><title>"
+ # 'Test' in russian (utf-8)
+ "\xd0\xa2\xd0\xb5\xd1\x81\xd1\x82"
+ "</title></head></html>")
+ rendered = pt()
+ self.failUnless(isinstance(rendered, unicode))
+ self.failUnlessEqual(rendered,
+ u"<html><head><title>"
+ u"\u0422\u0435\u0441\u0442"
+ u"</title></head></html>\n")
+
+ def test_html_encoding_by_meta(self):
+ pt = self.get_pt(
+ "<html><head><title>"
+ # 'Test' in russian (windows-1251)
+ "\xd2\xe5\xf1\xf2"
+ '</title><meta http-equiv="Content-Type"'
+ ' content="text/html; charset=windows-1251">'
+ "</head></html>")
+ rendered = pt()
+ self.failUnless(isinstance(rendered, unicode))
+ self.failUnlessEqual(rendered,
+ u"<html><head><title>"
+ u"\u0422\u0435\u0441\u0442"
+ u"</title></head></html>\n")
+
+ ##def test_xml_sniffing_from_extension(self):
+ ## # This checks the extension of the page template
+ ## this_directory = os.path.split(__file__)[0]
+ ## filepath = os.path.join(
+ ## this_directory,
+ ## 'test.xpt')
+ ## xpt = PageTemplateFile(filepath)
+ ## self.assert_(os.path.normcase(xpt.filename).endswith('.xpt'))
+ ## text, type_ = xpt._read_file()
+ ## self.assertEqual(type_, 'text/xml')
+
+ def test_type_sniffing_based_on_xmlns(self):
+ from zope.pagetemplate.typesniffer import sniff_type
+ self.assertEqual(
+ sniff_type("<doc><element/></doc>"), None)
+ self.assertEqual(
+ sniff_type("<doc xmlns=''><element/></doc>"), 'text/xml')
+ self.assertEqual(
+ sniff_type("<doc><element xmlns=''/></doc>"), 'text/xml')
+ self.assertEqual(
+ sniff_type("<doc xmlns='http://foo/bar'><element/></doc>"),
+ 'text/xml')
+ self.assertEqual(
+ sniff_type("<doc ><element xmlns='http://foo/bar'/></doc>"),
+ 'text/xml')
+
+def test_suite():
+ return unittest.makeSuite(TypeSniffingTestCase)
+
+if __name__ == "__main__":
+ unittest.main(defaultTest="test_suite")
diff --git a/typesniffer.py b/typesniffer.py
new file mode 100644
index 0000000..6deeb99
--- /dev/null
+++ b/typesniffer.py
@@ -0,0 +1,64 @@
+##############################################################################
+#
+# Copyright (c) 2005 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Type sniffer for page template input
+
+$Id$
+"""
+
+import xml.parsers.expat
+
+XML_PREFIXES = [
+ "<?xml", # ascii, utf-8
+ "\xef\xbb\xbf<?xml", # utf-8 w/ byte order mark
+ "\0<\0?\0x\0m\0l", # utf-16 big endian
+ "<\0?\0x\0m\0l\0", # utf-16 little endian
+ "\xfe\xff\0<\0?\0x\0m\0l", # utf-16 big endian w/ byte order mark
+ "\xff\xfe<\0?\0x\0m\0l\0", # utf-16 little endian w/ byte order mark
+ ]
+
+XML_PREFIX_MAX_LENGTH = max(map(len, XML_PREFIXES))
+
+class NamespaceFound(Exception):
+ # This exception is throwned by the parser when a namespace is
+ # found to stop the parsing.
+ pass
+
+def StartNamespaceDeclHandler(prefix, url):
+ # Called when an element contains a namespace declaration.
+ raise NamespaceFound
+
+def sniff_type(text):
+ """Return 'text/xml' if text appears to be XML, otherwise return None.
+
+ o if the document contains the xml process header <?xml ... ?>
+ o if the document contains any namespace declarations
+ """
+
+ # Check the xml processing header
+ for prefix in XML_PREFIXES:
+ if text.startswith(prefix):
+ return "text/xml"
+
+ # Check if the document contains any namespace declarations
+ parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
+ parser.StartNamespaceDeclHandler = StartNamespaceDeclHandler
+ try:
+ parser.Parse(text)
+ except xml.parsers.expat.ExpatError:
+ return None
+ except NamespaceFound:
+ return "text/xml"
+ else:
+ return None
+