parts/django/extras/csrf_migration_helper.py
changeset 69 c6bca38c1cbf
equal deleted inserted replaced
68:5ff1fc726848 69:c6bca38c1cbf
       
     1 #!/usr/bin/env python
       
     2 
       
     3 # This script aims to help developers locate forms and view code that needs to
       
     4 # use the new CSRF protection in Django 1.2.  It tries to find all the code that
       
     5 # may need the steps described in the CSRF documentation.  It does not modify
       
     6 # any code directly, it merely attempts to locate it.  Developers should be
       
     7 # aware of its limitations, described below.
       
     8 #
       
     9 # For each template that contains at least one POST form, the following info is printed:
       
    10 #
       
    11 # <Absolute path to template>
       
    12 #   AKA: <Aliases (relative to template directory/directories that contain it)>
       
    13 #   POST forms: <Number of POST forms>
       
    14 #   With token: <Number of POST forms with the CSRF token already added>
       
    15 #   Without token:
       
    16 #     <File name and line number of form without token>
       
    17 #
       
    18 #   Searching for:
       
    19 #     <Template names that need to be searched for in view code
       
    20 #      (includes templates that 'include' current template)>
       
    21 #
       
    22 #   Found:
       
    23 #     <File name and line number of any view code found>
       
    24 #
       
    25 # The format used allows this script to be used in Emacs grep mode:
       
    26 #   M-x grep
       
    27 #   Run grep (like this): /path/to/my/virtualenv/python /path/to/django/src/extras/csrf_migration_helper.py --settings=mysettings /path/to/my/srcs
       
    28 
       
    29 
       
    30 # Limitations
       
    31 # ===========
       
    32 #
       
    33 # - All templates must be stored on disk in '.html' or '.htm' files.
       
    34 #   (extensions configurable below)
       
    35 #
       
    36 # - All Python code must be stored on disk in '.py' files.  (extensions
       
    37 #   configurable below)
       
    38 #
       
    39 # - All templates must be accessible from TEMPLATE_DIRS or from the 'templates/'
       
    40 #   directory in apps specified in INSTALLED_APPS.  Non-file based template
       
    41 #   loaders are out of the picture, because there is no way to ask them to
       
    42 #   return all templates.
       
    43 #
       
    44 # - If you put the {% csrf_token %} tag on the same line as the <form> tag it
       
    45 #   will be detected, otherwise it will be assumed that the form does not have
       
    46 #   the token.
       
    47 #
       
    48 # - It's impossible to programmatically determine which forms should and should
       
    49 #   not have the token added.  The developer must decide when to do this,
       
    50 #   ensuring that the token is only added to internally targetted forms.
       
    51 #
       
    52 # - It's impossible to programmatically work out when a template is used.  The
       
    53 #   attempts to trace back to view functions are guesses, and could easily fail
       
    54 #   in the following ways:
       
    55 #
       
    56 #   * If the 'include' template tag is used with a variable
       
    57 #     i.e. {% include tname %} where tname is a variable containing the actual
       
    58 #     template name, rather than {% include "my_template.html" %}.
       
    59 #
       
    60 #   * If the template name has been built up by view code instead of as a simple
       
    61 #     string.  For example, generic views and the admin both do this.  (These
       
    62 #     apps are both contrib and both use RequestContext already, as it happens).
       
    63 #
       
    64 #   * If the 'ssl' tag (or any template tag other than 'include') is used to
       
    65 #     include the template in another template.
       
    66 #
       
    67 # - All templates belonging to apps referenced in INSTALLED_APPS will be
       
    68 #   searched, which may include third party apps or Django contrib.  In some
       
    69 #   cases, this will be a good thing, because even if the templates of these
       
    70 #   apps have been fixed by someone else, your own view code may reference the
       
    71 #   same template and may need to be updated.
       
    72 #
       
    73 #   You may, however, wish to comment out some entries in INSTALLED_APPS or
       
    74 #   TEMPLATE_DIRS before running this script.
       
    75 
       
    76 # Improvements to this script are welcome!
       
    77 
       
    78 # Configuration
       
    79 # =============
       
    80 
       
    81 TEMPLATE_EXTENSIONS = [
       
    82     ".html",
       
    83     ".htm",
       
    84     ]
       
    85 
       
    86 PYTHON_SOURCE_EXTENSIONS = [
       
    87     ".py",
       
    88     ]
       
    89 
       
    90 TEMPLATE_ENCODING = "UTF-8"
       
    91 
       
    92 PYTHON_ENCODING = "UTF-8"
       
    93 
       
    94 # Method
       
    95 # ======
       
    96 
       
    97 # Find templates:
       
    98 #  - template dirs
       
    99 #  - installed apps
       
   100 #
       
   101 # Search for POST forms
       
   102 #  - Work out what the name of the template is, as it would appear in an
       
   103 #    'include' or get_template() call. This can be done by comparing template
       
   104 #    filename to all template dirs.  Some templates can have more than one
       
   105 #    'name' e.g.  if a directory and one of its child directories are both in
       
   106 #    TEMPLATE_DIRS.  This is actually a common hack used for
       
   107 #    overriding-and-extending admin templates.
       
   108 #
       
   109 # For each POST form,
       
   110 # - see if it already contains '{% csrf_token %}' immediately after <form>
       
   111 # - work back to the view function(s):
       
   112 #   - First, see if the form is included in any other templates, then
       
   113 #     recursively compile a list of affected templates.
       
   114 #   - Find any code function that references that template.  This is just a
       
   115 #     brute force text search that can easily return false positives
       
   116 #     and fail to find real instances.
       
   117 
       
   118 
       
   119 import os
       
   120 import sys
       
   121 import re
       
   122 from optparse import OptionParser
       
   123 
       
   124 USAGE = """
       
   125 This tool helps to locate forms that need CSRF tokens added and the
       
   126 corresponding view code.  This processing is NOT fool proof, and you should read
       
   127 the help contained in the script itself.  Also, this script may need configuring
       
   128 (by editing the script) before use.
       
   129 
       
   130 Usage:
       
   131 
       
   132 python csrf_migration_helper.py [--settings=path.to.your.settings] /path/to/python/code [more paths...]
       
   133 
       
   134   Paths can be specified as relative paths.
       
   135 
       
   136   With no arguments, this help is printed.
       
   137 """
       
   138 
       
   139 _POST_FORM_RE = \
       
   140     re.compile(r'(<form\W[^>]*\bmethod\s*=\s*(\'|"|)POST(\'|"|)\b[^>]*>)', re.IGNORECASE)
       
   141 _TOKEN_RE = re.compile('\{% csrf_token')
       
   142 
       
   143 def get_template_dirs():
       
   144     """
       
   145     Returns a set of all directories that contain project templates.
       
   146     """
       
   147     from django.conf import settings
       
   148     dirs = set()
       
   149     if ('django.template.loaders.filesystem.load_template_source' in settings.TEMPLATE_LOADERS 
       
   150         or  'django.template.loaders.filesystem.Loader' in settings.TEMPLATE_LOADERS):
       
   151         dirs.update(map(unicode, settings.TEMPLATE_DIRS))
       
   152 
       
   153     if ('django.template.loaders.app_directories.load_template_source' in settings.TEMPLATE_LOADERS
       
   154         or 'django.template.loaders.app_directories.Loader' in settings.TEMPLATE_LOADERS):
       
   155         from django.template.loaders.app_directories import app_template_dirs
       
   156         dirs.update(app_template_dirs)
       
   157     return dirs
       
   158 
       
   159 def make_template_info(filename, root_dirs):
       
   160     """
       
   161     Creates a Template object for a filename, calculating the possible
       
   162     relative_filenames from the supplied filename and root template directories
       
   163     """
       
   164     return Template(filename,
       
   165                     [filename[len(d)+1:] for d in root_dirs if filename.startswith(d)])
       
   166 
       
   167 
       
   168 class Template(object):
       
   169     def __init__(self, absolute_filename, relative_filenames):
       
   170         self.absolute_filename, self.relative_filenames = absolute_filename, relative_filenames
       
   171 
       
   172     def content(self):
       
   173         try:
       
   174             return self._content
       
   175         except AttributeError:
       
   176             fd = open(self.absolute_filename)
       
   177             try:
       
   178                 content = fd.read().decode(TEMPLATE_ENCODING)
       
   179             except UnicodeDecodeError, e:
       
   180                 message = '%s in %s' % (
       
   181                     e[4], self.absolute_filename.encode('UTF-8', 'ignore'))
       
   182                 raise UnicodeDecodeError(*(e.args[:4] + (message,)))
       
   183             fd.close()
       
   184             self._content = content
       
   185             return content
       
   186     content = property(content)
       
   187 
       
   188     def post_form_info(self):
       
   189         """
       
   190         Get information about any POST forms in the template.
       
   191         Returns [(linenumber, csrf_token added)]
       
   192         """
       
   193         matches = []
       
   194         for ln, line in enumerate(self.content.split("\n")):
       
   195             m = _POST_FORM_RE.search(line)
       
   196             if m is not None:
       
   197                 matches.append((ln + 1, _TOKEN_RE.search(line) is not None))
       
   198         return matches
       
   199 
       
   200     def includes_template(self, t):
       
   201         """
       
   202         Returns true if this template includes template 't' (via {% include %})
       
   203         """
       
   204         for r in t.relative_filenames:
       
   205             if re.search(r'\{%\s*include\s+(\'|")' + re.escape(r) + r'(\1)\s*%\}', self.content):
       
   206                 return True
       
   207         return False
       
   208 
       
   209     def related_templates(self):
       
   210         """
       
   211         Returns all templates that include this one, recursively.  (starting
       
   212         with this one)
       
   213         """
       
   214         try:
       
   215             return self._related_templates
       
   216         except AttributeError:
       
   217             pass
       
   218 
       
   219         retval = set([self])
       
   220         for t in self.all_templates:
       
   221             if t.includes_template(self):
       
   222                 # If two templates mutually include each other, directly or
       
   223                 # indirectly, we have a problem here...
       
   224                 retval = retval.union(t.related_templates())
       
   225 
       
   226         self._related_templates = retval
       
   227         return retval
       
   228 
       
   229     def __repr__(self):
       
   230         return repr(self.absolute_filename)
       
   231 
       
   232     def __eq__(self, other):
       
   233         return self.absolute_filename == other.absolute_filename
       
   234 
       
   235     def __hash__(self):
       
   236         return hash(self.absolute_filename)
       
   237 
       
   238 def get_templates(dirs):
       
   239     """
       
   240     Returns all files in dirs that have template extensions, as Template
       
   241     objects.
       
   242     """
       
   243     templates = set()
       
   244     for root in dirs:
       
   245         for (dirpath, dirnames, filenames) in os.walk(root):
       
   246             for f in filenames:
       
   247                 if len([True for e in TEMPLATE_EXTENSIONS if f.endswith(e)]) > 0:
       
   248                     t = make_template_info(os.path.join(dirpath, f), dirs)
       
   249                     # templates need to be able to search others:
       
   250                     t.all_templates = templates
       
   251                     templates.add(t)
       
   252     return templates
       
   253 
       
   254 def get_python_code(paths):
       
   255     """
       
   256     Returns all Python code, as a list of tuples, each one being:
       
   257      (filename, list of lines)
       
   258     """
       
   259     retval = []
       
   260     for p in paths:
       
   261         if not os.path.isdir(p):
       
   262             raise Exception("'%s' is not a directory." % p)
       
   263         for (dirpath, dirnames, filenames) in os.walk(p):
       
   264             for f in filenames:
       
   265                 if len([True for e in PYTHON_SOURCE_EXTENSIONS if f.endswith(e)]) > 0:
       
   266                     fn = os.path.join(dirpath, f)
       
   267                     fd = open(fn)
       
   268                     content = [l.decode(PYTHON_ENCODING) for l in fd.readlines()]
       
   269                     fd.close()
       
   270                     retval.append((fn, content))
       
   271     return retval
       
   272 
       
   273 def search_python_list(python_code, template_names):
       
   274     """
       
   275     Searches python code for a list of template names.
       
   276     Returns a list of tuples, each one being:
       
   277      (filename, line number)
       
   278     """
       
   279     retval = []
       
   280     for tn in template_names:
       
   281         retval.extend(search_python(python_code, tn))
       
   282     retval = list(set(retval))
       
   283     retval.sort()
       
   284     return retval
       
   285 
       
   286 def search_python(python_code, template_name):
       
   287     """
       
   288     Searches Python code for a template name.
       
   289     Returns a list of tuples, each one being:
       
   290      (filename, line number)
       
   291     """
       
   292     retval = []
       
   293     for fn, content in python_code:
       
   294         for ln, line in enumerate(content):
       
   295             if ((u'"%s"' % template_name) in line) or \
       
   296                ((u"'%s'" % template_name) in line):
       
   297                 retval.append((fn, ln + 1))
       
   298     return retval
       
   299 
       
   300 def main(pythonpaths):
       
   301     template_dirs = get_template_dirs()
       
   302     templates = get_templates(template_dirs)
       
   303     python_code = get_python_code(pythonpaths)
       
   304     for t in templates:
       
   305         # Logic
       
   306         form_matches = t.post_form_info()
       
   307         num_post_forms = len(form_matches)
       
   308         form_lines_without_token = [ln for (ln, has_token) in form_matches if not has_token]
       
   309         if num_post_forms == 0:
       
   310             continue
       
   311         to_search = [rf for rt in t.related_templates() for rf in rt.relative_filenames]
       
   312         found = search_python_list(python_code, to_search)
       
   313 
       
   314         # Display:
       
   315         print t.absolute_filename
       
   316         for r in t.relative_filenames:
       
   317             print u"  AKA %s" % r
       
   318         print u"  POST forms: %s" % num_post_forms
       
   319         print u"  With token: %s" % (num_post_forms - len(form_lines_without_token))
       
   320         if form_lines_without_token:
       
   321             print u"  Without token:"
       
   322             for ln in form_lines_without_token:
       
   323                 print "%s:%d:" % (t.absolute_filename, ln)
       
   324         print
       
   325         print u"  Searching for:"
       
   326         for r in to_search:
       
   327             print u"    " + r
       
   328         print
       
   329         print u"  Found:"
       
   330         if len(found) == 0:
       
   331             print "    Nothing"
       
   332         else:
       
   333             for fn, ln in found:
       
   334                 print "%s:%d:" % (fn, ln)
       
   335 
       
   336         print
       
   337         print "----"
       
   338 
       
   339 
       
   340 parser = OptionParser(usage=USAGE)
       
   341 parser.add_option("", "--settings", action="store", dest="settings", help="Dotted path to settings file")
       
   342 
       
   343 if __name__ == '__main__':
       
   344     options, args = parser.parse_args()
       
   345     if len(args) == 0:
       
   346         parser.print_help()
       
   347         sys.exit(1)
       
   348 
       
   349     settings = getattr(options, 'settings', None)
       
   350     if settings is None:
       
   351         if os.environ.get("DJANGO_SETTINGS_MODULE", None) is None:
       
   352             print "You need to set DJANGO_SETTINGS_MODULE or use the '--settings' parameter"
       
   353             sys.exit(1)
       
   354     else:
       
   355         os.environ["DJANGO_SETTINGS_MODULE"] = settings
       
   356 
       
   357     main(args)