From bab5ab348e50f563e418580df1ae98df1d5c381f Mon Sep 17 00:00:00 2001 From: Chris Cahoon Date: Wed, 17 Jun 2009 20:42:15 +0000 Subject: [soc2009/http-wsgi-improvements] Added more tests for #10190, changed logic to pass them. http.charsets.determine_charset now takes the accept_charset header instead of the request. Passes the test suite, including the extensive tests on HttpResponse's detection of Accept-Charset and finding the codec from content_type. However, it does not test that the codec encodes properly. git-svn-id: http://code.djangoproject.com/svn/django/branches/soc2009/http-wsgi-improvements@11030 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/http/__init__.py | 24 ++++++++++---- django/http/charsets.py | 14 +++++--- tests/regressiontests/charsets/models.py | 3 ++ tests/regressiontests/charsets/tests.py | 55 ++++++++++++++++++++++++-------- tests/regressiontests/charsets/urls.py | 15 ++++++++- tests/regressiontests/charsets/views.py | 10 ++++-- 6 files changed, 93 insertions(+), 28 deletions(-) diff --git a/django/http/__init__.py b/django/http/__init__.py index 9f7e82ba46..b44d043350 100644 --- a/django/http/__init__.py +++ b/django/http/__init__.py @@ -13,7 +13,7 @@ except ImportError: from django.utils.datastructures import MultiValueDict, ImmutableList from django.utils.encoding import smart_str, iri_to_uri, force_unicode from django.http.multipartparser import MultiPartParser -from django.http.charsets import determine_charset +from django.http.charsets import determine_charset, get_codec from django.conf import settings from django.core.files import uploadhandler from utils import * @@ -273,13 +273,20 @@ class HttpResponse(object): status_code = 200 def __init__(self, content='', mimetype=None, status=None, - content_type=None, origin_request=None): + content_type=None, request=None): from django.conf import settings self._charset = settings.DEFAULT_CHARSET + accept_charset = None if mimetype: content_type = mimetype # Mimetype is an alias for content-type - if origin_request or content_type: - self._charset, self._codec = determine_charset(content_type, origin_request) + if request: + accept_charset = request.META.get("ACCEPT_CHARSET") + if accept_charset or content_type: + charset, codec = determine_charset(content_type, accept_charset) + if charset: + self._charset = charset + if codec: + self._codec = codec if not content_type: content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE, self._charset) @@ -365,7 +372,10 @@ class HttpResponse(object): def _get_content(self): if self.has_header('Content-Encoding'): return ''.join(self._container) - return smart_str(''.join(self._container), self._charset) + + if not self._codec: + self._codec = get_codec(self._charset) + return smart_str(''.join(self._container), self._codec.name) def _set_content(self, value): self._container = [value] @@ -379,8 +389,10 @@ class HttpResponse(object): def next(self): chunk = self._iterator.next() + if not self._codec: + self._codec = get_codec(self._charset) if isinstance(chunk, unicode): - chunk = chunk.encode(self._charset) + chunk = chunk.encode(self._codec.name) return str(chunk) def close(self): diff --git a/django/http/charsets.py b/django/http/charsets.py index 035786e3aa..8b2e582320 100644 --- a/django/http/charsets.py +++ b/django/http/charsets.py @@ -252,7 +252,7 @@ max_dict_key = lambda l:sorted(l.iteritems(), key=itemgetter(1), reverse=True)[0 CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') ACCEPT_CHARSET_RE = re.compile('(?P([\w\d-]+)|(\*))(;q=(?P[01](\.\d{1,3})?))?,?') -def determine_charset(content_type, request): +def determine_charset(content_type, accept_charset_header): """ Searches request headers from clients and mimetype settings (which may be set by users) for indicators of which charset and encoding the response should use. @@ -270,7 +270,6 @@ def determine_charset(content_type, request): """ codec = None charset = None - # Attempt to get the codec from a content-type, and verify that the charset is valid. if content_type: match = CONTENT_TYPE_RE.match(content_type) @@ -279,14 +278,19 @@ def determine_charset(content_type, request): codec = get_codec(charset) if not codec: # Unsupported charset # we should throw an exception here - print "No CODEC ON MIMETYPE" + # print "No CODEC ON MIMETYPE" + pass + # If we don't match a content-type header WITH charset, we give the default + else: + charset = settings.DEFAULT_CHARSET + codec = get_codec(settings.DEFAULT_CHARSET) # Handle Accept-Charset (which we only do if we do not deal with content_type). else: - if request and "ACCEPT_CHARSET" in request.META: + if accept_charset_header: # Get list of matches for Accepted-Charsets. # [{ charset : q }, { charset : q }] - match_iterator = ACCEPT_CHARSET_RE.finditer(request.META["ACCEPT_CHARSET"]) + match_iterator = ACCEPT_CHARSET_RE.finditer(accept_charset_header) accept_charset = [m.groupdict() for m in match_iterator] else: accept_charset = [] # use settings.DEFAULT_CHARSET diff --git a/tests/regressiontests/charsets/models.py b/tests/regressiontests/charsets/models.py index e69de29bb2..71a8362390 100644 --- a/tests/regressiontests/charsets/models.py +++ b/tests/regressiontests/charsets/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/tests/regressiontests/charsets/tests.py b/tests/regressiontests/charsets/tests.py index 03c62fb54c..c97b91bb70 100644 --- a/tests/regressiontests/charsets/tests.py +++ b/tests/regressiontests/charsets/tests.py @@ -1,10 +1,11 @@ -from django.test import Client, TestCase import re -from django.conf import settings -CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') +from django.test import Client, TestCase +from django.conf import settings +from django.http.charsets import determine_charset, get_codec +CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') def get_charset(response): match = CONTENT_TYPE_RE.match(response.get("content-type","")) if match: @@ -14,6 +15,7 @@ def get_charset(response): return charset class ClientTest(TestCase): + urls = 'regressiontests.charsets.urls' def test_good_accept_charset(self): "Use Accept-Charset" @@ -21,40 +23,65 @@ class ClientTest(TestCase): # anyway. response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii,utf-8;q=0") + + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "ascii") - + + def test_good_accept_charset2(self): # us is an alias for ascii response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.9") - self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) + self.assertEqual(response.status_code, 200) + self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) + + def test_good_accept_charset3(self): response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.7") - self.assertEqual(get_charset(response), "us") + self.assertEqual(response.status_code, 200) + self.assertEqual(get_charset(response), "us") + + def test_good_accept_charset4(self): response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9") - self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) + self.assertEqual(response.status_code, 200) + self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) + + def test_good_accept_charset5(self): response = self.client.post('/accept_charset/', ACCEPT_CHARSET="utf-8;q=0") + + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "ISO-8859-1") - + def test_bad_accept_charset(self): "Do not use a malformed Accept-Charset" # The data is ignored, but let's check it doesn't crash the system # anyway. response = self.client.post('/accept_charset/', ACCEPT_CHARSET="this_is_junk") + + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "utf-8") def test_good_content_type(self): - "Use content-type" + "Use good content-type" # The data is ignored, but let's check it doesn't crash the system # anyway. response = self.client.post('/good_content_type/') - self.assertEqual(get_charset(response), "us") + self.assertEqual(response.status_code, 200) def test_bad_content_type(self): - "Use content-type" - # The data is ignored, but let's check it doesn't crash the system - # anyway. + "Use bad content-type" - response = self.client.post('/bad_content_type/') \ No newline at end of file + response = self.client.post('/bad_content_type/') + self.assertEqual(response.status_code, 200) + self.assertEqual(get_codec(get_charset(response)), None) + + def test_content_type_no_charset(self): + response = self.client.post('/content_type_no_charset/') + self.assertEqual(get_charset(response), None) + + def test_determine_charset(self): + content_type, codec = determine_charset("", "utf-8;q=0.8,*;q=0.9") + self.assertEqual(codec, get_codec("ISO-8859-1")) + \ No newline at end of file diff --git a/tests/regressiontests/charsets/urls.py b/tests/regressiontests/charsets/urls.py index 8df7e6054d..730d6a2fbe 100644 --- a/tests/regressiontests/charsets/urls.py +++ b/tests/regressiontests/charsets/urls.py @@ -1,9 +1,22 @@ from django.conf.urls.defaults import * - import views +# Uncomment the next two lines to enable the admin: +# from django.contrib import admin +# admin.autodiscover() + urlpatterns = patterns('', + # Example: + # (r'^tutu/', include('tutu.foo.urls')), + + # Uncomment the admin/doc line below and add 'django.contrib.admindocs' + # to INSTALLED_APPS to enable admin documentation: + # (r'^admin/doc/', include('django.contrib.admindocs.urls')), + + # Uncomment the next line to enable the admin: + # (r'^admin/', include(admin.site.urls)), (r'^accept_charset/', views.accept_charset), (r'^good_content_type/', views.good_content_type), (r'^bad_content_type/', views.bad_content_type), + (r'^content_type_no_charset/', views.content_type_no_charset), ) diff --git a/tests/regressiontests/charsets/views.py b/tests/regressiontests/charsets/views.py index cb9fa3a682..7005e8c0d7 100644 --- a/tests/regressiontests/charsets/views.py +++ b/tests/regressiontests/charsets/views.py @@ -2,10 +2,16 @@ from django.http import HttpResponse from django.shortcuts import render_to_response def accept_charset(request): - return HttpResponse("ASCII.", origin_request=request) + return HttpResponse("ASCII.", request=request) def good_content_type(request): return HttpResponse("ASCII.", content_type="text/html; charset=us") def bad_content_type(request): - return HttpResponse("ASCII.", content_type="text/html; charset=this_should_be_junk") + return HttpResponse("UTF-8", content_type="text/html; charset=this_should_be_junk") + +def content_type_no_charset(request): + return HttpResponse("UTF-8", content_type="text/html") + +def encode_response(request): + return HttpResponse(u"\ue863", content_type="text/html; charset=GBK") \ No newline at end of file -- cgit v1.2.1