summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Cahoon <chris.cahoon@gmail.com>2009-06-17 20:42:15 +0000
committerChris Cahoon <chris.cahoon@gmail.com>2009-06-17 20:42:15 +0000
commitbab5ab348e50f563e418580df1ae98df1d5c381f (patch)
treecc28706bb0275ad2f9a8f99db2a294b2cc236f4d
parent1896d531cb64873165195de46bb1dc5562887e5d (diff)
downloaddjango-bab5ab348e50f563e418580df1ae98df1d5c381f.tar.gz
[soc2009/http-wsgi-improvements] Added more tests for #10190, changed logic to pass them. http.charsets.determine_charset now takes the accept_charset header instead of the request.
Passes the test suite, including the extensive tests on HttpResponse's detection of Accept-Charset and finding the codec from content_type. However, it does not test that the codec encodes properly. git-svn-id: http://code.djangoproject.com/svn/django/branches/soc2009/http-wsgi-improvements@11030 bcc190cf-cafb-0310-a4f2-bffc1f526a37
-rw-r--r--django/http/__init__.py24
-rw-r--r--django/http/charsets.py14
-rw-r--r--tests/regressiontests/charsets/models.py3
-rw-r--r--tests/regressiontests/charsets/tests.py55
-rw-r--r--tests/regressiontests/charsets/urls.py15
-rw-r--r--tests/regressiontests/charsets/views.py10
6 files changed, 93 insertions, 28 deletions
diff --git a/django/http/__init__.py b/django/http/__init__.py
index 9f7e82ba46..b44d043350 100644
--- a/django/http/__init__.py
+++ b/django/http/__init__.py
@@ -13,7 +13,7 @@ except ImportError:
from django.utils.datastructures import MultiValueDict, ImmutableList
from django.utils.encoding import smart_str, iri_to_uri, force_unicode
from django.http.multipartparser import MultiPartParser
-from django.http.charsets import determine_charset
+from django.http.charsets import determine_charset, get_codec
from django.conf import settings
from django.core.files import uploadhandler
from utils import *
@@ -273,13 +273,20 @@ class HttpResponse(object):
status_code = 200
def __init__(self, content='', mimetype=None, status=None,
- content_type=None, origin_request=None):
+ content_type=None, request=None):
from django.conf import settings
self._charset = settings.DEFAULT_CHARSET
+ accept_charset = None
if mimetype:
content_type = mimetype # Mimetype is an alias for content-type
- if origin_request or content_type:
- self._charset, self._codec = determine_charset(content_type, origin_request)
+ if request:
+ accept_charset = request.META.get("ACCEPT_CHARSET")
+ if accept_charset or content_type:
+ charset, codec = determine_charset(content_type, accept_charset)
+ if charset:
+ self._charset = charset
+ if codec:
+ self._codec = codec
if not content_type:
content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE,
self._charset)
@@ -365,7 +372,10 @@ class HttpResponse(object):
def _get_content(self):
if self.has_header('Content-Encoding'):
return ''.join(self._container)
- return smart_str(''.join(self._container), self._charset)
+
+ if not self._codec:
+ self._codec = get_codec(self._charset)
+ return smart_str(''.join(self._container), self._codec.name)
def _set_content(self, value):
self._container = [value]
@@ -379,8 +389,10 @@ class HttpResponse(object):
def next(self):
chunk = self._iterator.next()
+ if not self._codec:
+ self._codec = get_codec(self._charset)
if isinstance(chunk, unicode):
- chunk = chunk.encode(self._charset)
+ chunk = chunk.encode(self._codec.name)
return str(chunk)
def close(self):
diff --git a/django/http/charsets.py b/django/http/charsets.py
index 035786e3aa..8b2e582320 100644
--- a/django/http/charsets.py
+++ b/django/http/charsets.py
@@ -252,7 +252,7 @@ max_dict_key = lambda l:sorted(l.iteritems(), key=itemgetter(1), reverse=True)[0
CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?')
ACCEPT_CHARSET_RE = re.compile('(?P<charset>([\w\d-]+)|(\*))(;q=(?P<q>[01](\.\d{1,3})?))?,?')
-def determine_charset(content_type, request):
+def determine_charset(content_type, accept_charset_header):
"""
Searches request headers from clients and mimetype settings (which may be set
by users) for indicators of which charset and encoding the response should use.
@@ -270,7 +270,6 @@ def determine_charset(content_type, request):
"""
codec = None
charset = None
-
# Attempt to get the codec from a content-type, and verify that the charset is valid.
if content_type:
match = CONTENT_TYPE_RE.match(content_type)
@@ -279,14 +278,19 @@ def determine_charset(content_type, request):
codec = get_codec(charset)
if not codec: # Unsupported charset
# we should throw an exception here
- print "No CODEC ON MIMETYPE"
+ # print "No CODEC ON MIMETYPE"
+ pass
+ # If we don't match a content-type header WITH charset, we give the default
+ else:
+ charset = settings.DEFAULT_CHARSET
+ codec = get_codec(settings.DEFAULT_CHARSET)
# Handle Accept-Charset (which we only do if we do not deal with content_type).
else:
- if request and "ACCEPT_CHARSET" in request.META:
+ if accept_charset_header:
# Get list of matches for Accepted-Charsets.
# [{ charset : q }, { charset : q }]
- match_iterator = ACCEPT_CHARSET_RE.finditer(request.META["ACCEPT_CHARSET"])
+ match_iterator = ACCEPT_CHARSET_RE.finditer(accept_charset_header)
accept_charset = [m.groupdict() for m in match_iterator]
else:
accept_charset = [] # use settings.DEFAULT_CHARSET
diff --git a/tests/regressiontests/charsets/models.py b/tests/regressiontests/charsets/models.py
index e69de29bb2..71a8362390 100644
--- a/tests/regressiontests/charsets/models.py
+++ b/tests/regressiontests/charsets/models.py
@@ -0,0 +1,3 @@
+from django.db import models
+
+# Create your models here.
diff --git a/tests/regressiontests/charsets/tests.py b/tests/regressiontests/charsets/tests.py
index 03c62fb54c..c97b91bb70 100644
--- a/tests/regressiontests/charsets/tests.py
+++ b/tests/regressiontests/charsets/tests.py
@@ -1,10 +1,11 @@
-from django.test import Client, TestCase
import re
-from django.conf import settings
-CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?')
+from django.test import Client, TestCase
+from django.conf import settings
+from django.http.charsets import determine_charset, get_codec
+CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?')
def get_charset(response):
match = CONTENT_TYPE_RE.match(response.get("content-type",""))
if match:
@@ -14,6 +15,7 @@ def get_charset(response):
return charset
class ClientTest(TestCase):
+ urls = 'regressiontests.charsets.urls'
def test_good_accept_charset(self):
"Use Accept-Charset"
@@ -21,40 +23,65 @@ class ClientTest(TestCase):
# anyway.
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii,utf-8;q=0")
+
+ self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), "ascii")
-
+
+ def test_good_accept_charset2(self):
# us is an alias for ascii
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.9")
- self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
+
+ def test_good_accept_charset3(self):
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.7")
- self.assertEqual(get_charset(response), "us")
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(get_charset(response), "us")
+
+ def test_good_accept_charset4(self):
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9")
- self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
+
+ def test_good_accept_charset5(self):
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="utf-8;q=0")
+
+ self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), "ISO-8859-1")
-
+
def test_bad_accept_charset(self):
"Do not use a malformed Accept-Charset"
# The data is ignored, but let's check it doesn't crash the system
# anyway.
response = self.client.post('/accept_charset/', ACCEPT_CHARSET="this_is_junk")
+
+ self.assertEqual(response.status_code, 200)
self.assertEqual(get_charset(response), "utf-8")
def test_good_content_type(self):
- "Use content-type"
+ "Use good content-type"
# The data is ignored, but let's check it doesn't crash the system
# anyway.
response = self.client.post('/good_content_type/')
- self.assertEqual(get_charset(response), "us")
+ self.assertEqual(response.status_code, 200)
def test_bad_content_type(self):
- "Use content-type"
- # The data is ignored, but let's check it doesn't crash the system
- # anyway.
+ "Use bad content-type"
- response = self.client.post('/bad_content_type/') \ No newline at end of file
+ response = self.client.post('/bad_content_type/')
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(get_codec(get_charset(response)), None)
+
+ def test_content_type_no_charset(self):
+ response = self.client.post('/content_type_no_charset/')
+ self.assertEqual(get_charset(response), None)
+
+ def test_determine_charset(self):
+ content_type, codec = determine_charset("", "utf-8;q=0.8,*;q=0.9")
+ self.assertEqual(codec, get_codec("ISO-8859-1"))
+ \ No newline at end of file
diff --git a/tests/regressiontests/charsets/urls.py b/tests/regressiontests/charsets/urls.py
index 8df7e6054d..730d6a2fbe 100644
--- a/tests/regressiontests/charsets/urls.py
+++ b/tests/regressiontests/charsets/urls.py
@@ -1,9 +1,22 @@
from django.conf.urls.defaults import *
-
import views
+# Uncomment the next two lines to enable the admin:
+# from django.contrib import admin
+# admin.autodiscover()
+
urlpatterns = patterns('',
+ # Example:
+ # (r'^tutu/', include('tutu.foo.urls')),
+
+ # Uncomment the admin/doc line below and add 'django.contrib.admindocs'
+ # to INSTALLED_APPS to enable admin documentation:
+ # (r'^admin/doc/', include('django.contrib.admindocs.urls')),
+
+ # Uncomment the next line to enable the admin:
+ # (r'^admin/', include(admin.site.urls)),
(r'^accept_charset/', views.accept_charset),
(r'^good_content_type/', views.good_content_type),
(r'^bad_content_type/', views.bad_content_type),
+ (r'^content_type_no_charset/', views.content_type_no_charset),
)
diff --git a/tests/regressiontests/charsets/views.py b/tests/regressiontests/charsets/views.py
index cb9fa3a682..7005e8c0d7 100644
--- a/tests/regressiontests/charsets/views.py
+++ b/tests/regressiontests/charsets/views.py
@@ -2,10 +2,16 @@ from django.http import HttpResponse
from django.shortcuts import render_to_response
def accept_charset(request):
- return HttpResponse("ASCII.", origin_request=request)
+ return HttpResponse("ASCII.", request=request)
def good_content_type(request):
return HttpResponse("ASCII.", content_type="text/html; charset=us")
def bad_content_type(request):
- return HttpResponse("ASCII.", content_type="text/html; charset=this_should_be_junk")
+ return HttpResponse("UTF-8", content_type="text/html; charset=this_should_be_junk")
+
+def content_type_no_charset(request):
+ return HttpResponse("UTF-8", content_type="text/html")
+
+def encode_response(request):
+ return HttpResponse(u"\ue863", content_type="text/html; charset=GBK") \ No newline at end of file