Properly restore the HTML snippet detection, by looking at the entire string and not just its start.

author: Hanno Schlichting <hanno@hannosch.eu> 2011-07-27 09:44:17 +0000
committer: Hanno Schlichting <hanno@hannosch.eu> 2011-07-27 09:44:17 +0000
commit: ea3ac1206a8cc660bfba7f0911abde96c1baf7ad (patch)
tree: da01aca2e2b32208a90b9ef71e8d404d16136c39 /src
parent: 12c8fe589333d236d82dfd4d652891a984e81d93 (diff)
download: zope-contenttype-ea3ac1206a8cc660bfba7f0911abde96c1baf7ad.tar.gz
2 files changed, 8 insertions, 6 deletions
diff --git a/src/zope/contenttype/__init__.py b/src/zope/contenttype/__init__.py
index 1b42fd6..c0a26ec 100644
--- a/src/zope/contenttype/__init__.py
+++ b/src/zope/contenttype/__init__.py
@@ -28,10 +28,9 @@ def text_type(s):
     """
     # at least the maximum length of any tags we look for
     max_tags = 14
-    s = s.strip()[:max_tags]
-    s2 = s.lower()
+    s2 = s.strip()[:max_tags].lower()
 
-    if len(s) == max_tags:
+    if len(s2) == max_tags:
         if s2.startswith('<html>'):
             return 'text/html'
 
@@ -39,10 +38,11 @@ def text_type(s):
             return 'text/html'
 
         # what about encodings??
-        if s.startswith('<?xml'):
+        if s2.startswith('<?xml'):
             return 'text/xml'
 
-    # we also recognize small snippets of HTML
+    # we also recognize small snippets of HTML - the closing tag might be
+    # anywhere, even at the end of 
     if '</' in s:
         return 'text/html'
 
diff --git a/src/zope/contenttype/tests/testContentTypes.py b/src/zope/contenttype/tests/testContentTypes.py
index d82d5bf..a09f278 100644
--- a/src/zope/contenttype/tests/testContentTypes.py
+++ b/src/zope/contenttype/tests/testContentTypes.py
@@ -72,7 +72,7 @@ class ContentTypesTestCase(unittest.TestCase):
         self.assertEqual(text_type('<?xml version="1.0"><foo/>'),
                          'text/xml')
         self.assertEqual(text_type('<?XML version="1.0"><foo/>'),
-                         'text/plain')
+                         'text/xml')
         self.assertEqual(text_type('foo bar'),
                          'text/plain')
         self.assertEqual(text_type('<!DOCTYPE HTML PUBLIC '
@@ -82,6 +82,8 @@ class ContentTypesTestCase(unittest.TestCase):
         self.assertEqual(text_type('\n\n<!DOCTYPE html>\n'), 'text/html')
         # we can also parse text snippets
         self.assertEqual(text_type('<p>Hello</p>'), 'text/html')
+        longtext = 'abc ' * 100
+        self.assertEqual(text_type('<p>%s</p>' % longtext), 'text/html')
         # See https://bugs.launchpad.net/bugs/487998
         self.assertEqual(text_type(' ' * 14 + HTML),
                          'text/html')
author	Hanno Schlichting <hanno@hannosch.eu>	2011-07-27 09:44:17 +0000
committer	Hanno Schlichting <hanno@hannosch.eu>	2011-07-27 09:44:17 +0000
commit	ea3ac1206a8cc660bfba7f0911abde96c1baf7ad (patch)
tree	da01aca2e2b32208a90b9ef71e8d404d16136c39 /src
parent	12c8fe589333d236d82dfd4d652891a984e81d93 (diff)
download	zope-contenttype-ea3ac1206a8cc660bfba7f0911abde96c1baf7ad.tar.gz