summaryrefslogtreecommitdiff
path: root/extras
diff options
context:
space:
mode:
authorLuke Plant <L.Plant.98@cantab.net>2009-10-26 23:23:07 +0000
committerLuke Plant <L.Plant.98@cantab.net>2009-10-26 23:23:07 +0000
commit8e70cef9b67433edd70935dcc30c621d1e7fc0a0 (patch)
tree9dc32d96165c27bb0be761cce3de5c85e0ccf9a5 /extras
parentd1da26141788f8b359d96c49bc596125598d23ee (diff)
downloaddjango-8e70cef9b67433edd70935dcc30c621d1e7fc0a0.tar.gz
Fixed #9977 - CsrfMiddleware gets template tag added, session dependency removed, and turned on by default.
This is a large change to CSRF protection for Django. It includes: * removing the dependency on the session framework. * deprecating CsrfResponseMiddleware, and replacing with a core template tag. * turning on CSRF protection by default by adding CsrfViewMiddleware to the default value of MIDDLEWARE_CLASSES. * protecting all contrib apps (whatever is in settings.py) using a decorator. For existing users of the CSRF functionality, it should be a seamless update, but please note that it includes DEPRECATION of features in Django 1.1, and there are upgrade steps which are detailed in the docs. Many thanks to 'Glenn' and 'bthomas', who did a lot of the thinking and work on the patch, and to lots of other people including Simon Willison and Russell Keith-Magee who refined the ideas. Details of the rationale for these changes is found here: http://code.djangoproject.com/wiki/CsrfProtection As of this commit, the CSRF code is mainly in 'contrib'. The code will be moved to core in a separate commit, to make the changeset as readable as possible. git-svn-id: http://code.djangoproject.com/svn/django/trunk@11660 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Diffstat (limited to 'extras')
-rw-r--r--extras/csrf_migration_helper.py369
1 files changed, 369 insertions, 0 deletions
diff --git a/extras/csrf_migration_helper.py b/extras/csrf_migration_helper.py
new file mode 100644
index 0000000000..bc352a1762
--- /dev/null
+++ b/extras/csrf_migration_helper.py
@@ -0,0 +1,369 @@
+#!/usr/bin/env python
+
+# This script aims to help developers locate forms and view code that needs to
+# use the new CSRF protection in Django 1.2. It tries to find all the code that
+# may need the steps described in the CSRF documentation. It does not modify
+# any code directly, it merely attempts to locate it. Developers should be
+# aware of its limitations, described below.
+#
+# For each template that contains at least one POST form, the following info is printed:
+#
+# <Absolute path to template>
+# AKA: <Aliases (relative to template directory/directories that contain it)>
+# POST forms: <Number of POST forms>
+# With token: <Number of POST forms with the CSRF token already added>
+# Without token:
+# <File name and line number of form without token>
+#
+# Searching for:
+# <Template names that need to be searched for in view code
+# (includes templates that 'include' current template)>
+#
+# Found:
+# <File name and line number of any view code found>
+#
+# The format used allows this script to be used in Emacs grep mode:
+# M-x grep
+# Run grep (like this): /path/to/my/virtualenv/python /path/to/django/src/extras/csrf_migration_helper.py --settings=mysettings /path/to/my/srcs
+
+
+# Limitations
+# ===========
+#
+# - All templates must be stored on disk in '.html' or '.htm' files.
+# (extensions configurable below)
+#
+# - All Python code must be stored on disk in '.py' files. (extensions
+# configurable below)
+#
+# - All templates must be accessible from TEMPLATE_DIRS or from the 'templates/'
+# directory in apps specified in INSTALLED_APPS. Non-file based template
+# loaders are out of the picture, because there is no way to ask them to
+# return all templates.
+#
+# - If you put the {% csrf_token %} tag on the same line as the <form> tag it
+# will be detected, otherwise it will be assumed that the form does not have
+# the token.
+#
+# - It's impossible to programmatically determine which forms should and should
+# not have the token added. The developer must decide when to do this,
+# ensuring that the token is only added to internally targetted forms.
+#
+# - It's impossible to programmatically work out when a template is used. The
+# attempts to trace back to view functions are guesses, and could easily fail
+# in the following ways:
+#
+# * If the 'include' template tag is used with a variable
+# i.e. {% include tname %} where tname is a variable containing the actual
+# template name, rather than {% include "my_template.html" %}.
+#
+# * If the template name has been built up by view code instead of as a simple
+# string. For example, generic views and the admin both do this. (These
+# apps are both contrib and both use RequestContext already, as it happens).
+#
+# * If the 'ssl' tag (or any template tag other than 'include') is used to
+# include the template in another template.
+#
+# - All templates belonging to apps referenced in INSTALLED_APPS will be
+# searched, which may include third party apps or Django contrib. In some
+# cases, this will be a good thing, because even if the templates of these
+# apps have been fixed by someone else, your own view code may reference the
+# same template and may need to be updated.
+#
+# You may, however, wish to comment out some entries in INSTALLED_APPS or
+# TEMPLATE_DIRS before running this script.
+
+# Improvements to this script are welcome!
+
+# Configuration
+# =============
+
+TEMPLATE_EXTENSIONS = [
+ ".html",
+ ".htm",
+ ]
+
+PYTHON_SOURCE_EXTENSIONS = [
+ ".py",
+ ]
+
+TEMPLATE_ENCODING = "UTF-8"
+
+PYTHON_ENCODING = "UTF-8"
+
+# Method
+# ======
+
+# Find templates:
+# - template dirs
+# - installed apps
+#
+# Search for POST forms
+# - Work out what the name of the template is, as it would appear in an
+# 'include' or get_template() call. This can be done by comparing template
+# filename to all template dirs. Some templates can have more than one
+# 'name' e.g. if a directory and one of its child directories are both in
+# TEMPLATE_DIRS. This is actually a common hack used for
+# overriding-and-extending admin templates.
+#
+# For each POST form,
+# - see if it already contains '{% csrf_token %}' immediately after <form>
+# - work back to the view function(s):
+# - First, see if the form is included in any other templates, then
+# recursively compile a list of affected templates.
+# - Find any code function that references that template. This is just a
+# brute force text search that can easily return false positives
+# and fail to find real instances.
+
+
+import os
+import sys
+import re
+try:
+ set
+except NameError:
+ from sets import Set as set
+
+
+USAGE = """
+This tool helps to locate forms that need CSRF tokens added and the
+corresponding view code. This processing is NOT fool proof, and you should read
+the help contained in the script itself. Also, this script may need configuring
+(by editing the script) before use.
+
+Usage:
+
+python csrf_migration_helper.py [--settings=path.to.your.settings] /path/to/python/code [more paths...]
+
+ Paths can be specified as relative paths.
+
+ With no arguments, this help is printed.
+"""
+
+_POST_FORM_RE = \
+ re.compile(r'(<form\W[^>]*\bmethod\s*=\s*(\'|"|)POST(\'|"|)\b[^>]*>)', re.IGNORECASE)
+_TOKEN_RE = re.compile('\{% csrf_token')
+
+def get_template_dirs():
+ """
+ Returns a set of all directories that contain project templates.
+ """
+ from django.conf import settings
+ dirs = set()
+ if 'django.template.loaders.filesystem.load_template_source' in settings.TEMPLATE_LOADERS:
+ dirs.update(map(unicode, settings.TEMPLATE_DIRS))
+
+ if 'django.template.loaders.app_directories.load_template_source' in settings.TEMPLATE_LOADERS:
+ from django.template.loaders.app_directories import app_template_dirs
+ dirs.update(app_template_dirs)
+ return dirs
+
+def make_template_info(filename, root_dirs):
+ """
+ Creates a Template object for a filename, calculating the possible
+ relative_filenames from the supplied filename and root template directories
+ """
+ return Template(filename,
+ [filename[len(d)+1:] for d in root_dirs if filename.startswith(d)])
+
+
+class Template(object):
+ def __init__(self, absolute_filename, relative_filenames):
+ self.absolute_filename, self.relative_filenames = absolute_filename, relative_filenames
+
+ def content(self):
+ try:
+ return self._content
+ except AttributeError:
+ fd = open(self.absolute_filename)
+ content = fd.read().decode(TEMPLATE_ENCODING)
+ fd.close()
+ self._content = content
+ return content
+ content = property(content)
+
+ def post_form_info(self):
+ """
+ Get information about any POST forms in the template.
+ Returns [(linenumber, csrf_token added)]
+ """
+ matches = []
+ for ln, line in enumerate(self.content.split("\n")):
+ m = _POST_FORM_RE.search(line)
+ if m is not None:
+ matches.append((ln + 1, _TOKEN_RE.search(line) is not None))
+ return matches
+
+ def includes_template(self, t):
+ """
+ Returns true if this template includes template 't' (via {% include %})
+ """
+ for r in t.relative_filenames:
+ if re.search(r'\{%\s*include\s+"' + re.escape(r) + r'"\s*%\}', self.content):
+ return True
+ return False
+
+ def related_templates(self):
+ """
+ Returns all templates that include this one, recursively. (starting
+ with this one)
+ """
+ try:
+ return self._related_templates
+ except AttributeError:
+ pass
+
+ retval = set([self])
+ for r in self.relative_filenames:
+ for t in self.all_templates:
+ if t.includes_template(self):
+ # If two templates mutually include each other, directly or
+ # indirectly, we have a problem here...
+ retval = retval.union(t.related_templates())
+
+ self._related_templates = retval
+ return retval
+
+ def __repr__(self):
+ return repr(self.absolute_filename)
+
+ def __eq__(self, other):
+ return self.absolute_filename == other.absolute_filename
+
+ def __hash__(self):
+ return hash(self.absolute_filename)
+
+def get_templates(dirs):
+ """
+ Returns all files in dirs that have template extensions, as Template
+ objects.
+ """
+ templates = set()
+ for root in dirs:
+ for (dirpath, dirnames, filenames) in os.walk(root):
+ for f in filenames:
+ if len([True for e in TEMPLATE_EXTENSIONS if f.endswith(e)]) > 0:
+ t = make_template_info(os.path.join(dirpath, f), dirs)
+ # templates need to be able to search others:
+ t.all_templates = templates
+ templates.add(t)
+ return templates
+
+def get_python_code(paths):
+ """
+ Returns all Python code, as a list of tuples, each one being:
+ (filename, list of lines)
+ """
+ retval = []
+ for p in paths:
+ for (dirpath, dirnames, filenames) in os.walk(p):
+ for f in filenames:
+ if len([True for e in PYTHON_SOURCE_EXTENSIONS if f.endswith(e)]) > 0:
+ fn = os.path.join(dirpath, f)
+ fd = open(fn)
+ content = [l.decode(PYTHON_ENCODING) for l in fd.readlines()]
+ fd.close()
+ retval.append((fn, content))
+ return retval
+
+def search_python_list(python_code, template_names):
+ """
+ Searches python code for a list of template names.
+ Returns a list of tuples, each one being:
+ (filename, line number)
+ """
+ retval = []
+ for tn in template_names:
+ retval.extend(search_python(python_code, tn))
+ retval = list(set(retval))
+ retval.sort()
+ return retval
+
+def search_python(python_code, template_name):
+ """
+ Searches Python code for a template name.
+ Returns a list of tuples, each one being:
+ (filename, line number)
+ """
+ retval = []
+ for fn, content in python_code:
+ for ln, line in enumerate(content):
+ if ((u'"%s"' % template_name) in line) or \
+ ((u"'%s'" % template_name) in line):
+ retval.append((fn, ln + 1))
+ return retval
+
+def main(pythonpaths):
+ template_dirs = get_template_dirs()
+ templates = get_templates(template_dirs)
+ python_code = get_python_code(pythonpaths)
+ for t in templates:
+ # Logic
+ form_matches = t.post_form_info()
+ num_post_forms = len(form_matches)
+ form_lines_without_token = [ln for (ln, has_token) in form_matches if not has_token]
+ if num_post_forms == 0:
+ continue
+ to_search = [rf for rt in t.related_templates() for rf in rt.relative_filenames]
+ found = search_python_list(python_code, to_search)
+
+ # Display:
+ print t.absolute_filename
+ for r in t.relative_filenames:
+ print u" AKA %s" % r
+ print u" POST forms: %s" % num_post_forms
+ print u" With token: %s" % (num_post_forms - len(form_lines_without_token))
+ if form_lines_without_token:
+ print u" Without token:"
+ for ln in form_lines_without_token:
+ print "%s:%d:" % (t.absolute_filename, ln)
+ print
+ print u" Searching for:"
+ for r in to_search:
+ print u" " + r
+ print
+ print u" Found:"
+ if len(found) == 0:
+ print " Nothing"
+ else:
+ for fn, ln in found:
+ print "%s:%d:" % (fn, ln)
+
+ print
+ print "----"
+
+
+if __name__ == '__main__':
+ # Hacky argument parsing, one day I'll learn OptParse...
+ args = list(sys.argv[1:])
+ if len(args) > 0:
+ if args[0] in ['--help', '-h', '-?', '--usage']:
+ print USAGE
+ sys.exit(0)
+ else:
+ if args[0].startswith('--settings='):
+ module = args[0][len('--settings='):]
+ os.environ["DJANGO_SETTINGS_MODULE"] = module
+ args = args[1:]
+
+ if args[0].startswith('-'):
+ print "Unknown option: %s" % args[0]
+ print USAGE
+ sys.exit(1)
+
+ pythonpaths = args
+
+ if os.environ.get("DJANGO_SETTINGS_MODULE", None) is None:
+ print "You need to set DJANGO_SETTINGS_MODULE or use the '--settings' parameter"
+ sys.exit(1)
+ if len(pythonpaths) == 0:
+ print "Unrecognised command: %s" % command
+ print USAGE
+ sys.exit(1)
+
+ main(pythonpaths)
+
+ else:
+ # no args
+ print USAGE
+ sys.exit(0)