|
1 #!/usr/bin/env python |
|
2 |
|
3 # This script aims to help developers locate forms and view code that needs to |
|
4 # use the new CSRF protection in Django 1.2. It tries to find all the code that |
|
5 # may need the steps described in the CSRF documentation. It does not modify |
|
6 # any code directly, it merely attempts to locate it. Developers should be |
|
7 # aware of its limitations, described below. |
|
8 # |
|
9 # For each template that contains at least one POST form, the following info is printed: |
|
10 # |
|
11 # <Absolute path to template> |
|
12 # AKA: <Aliases (relative to template directory/directories that contain it)> |
|
13 # POST forms: <Number of POST forms> |
|
14 # With token: <Number of POST forms with the CSRF token already added> |
|
15 # Without token: |
|
16 # <File name and line number of form without token> |
|
17 # |
|
18 # Searching for: |
|
19 # <Template names that need to be searched for in view code |
|
20 # (includes templates that 'include' current template)> |
|
21 # |
|
22 # Found: |
|
23 # <File name and line number of any view code found> |
|
24 # |
|
25 # The format used allows this script to be used in Emacs grep mode: |
|
26 # M-x grep |
|
27 # Run grep (like this): /path/to/my/virtualenv/python /path/to/django/src/extras/csrf_migration_helper.py --settings=mysettings /path/to/my/srcs |
|
28 |
|
29 |
|
30 # Limitations |
|
31 # =========== |
|
32 # |
|
33 # - All templates must be stored on disk in '.html' or '.htm' files. |
|
34 # (extensions configurable below) |
|
35 # |
|
36 # - All Python code must be stored on disk in '.py' files. (extensions |
|
37 # configurable below) |
|
38 # |
|
39 # - All templates must be accessible from TEMPLATE_DIRS or from the 'templates/' |
|
40 # directory in apps specified in INSTALLED_APPS. Non-file based template |
|
41 # loaders are out of the picture, because there is no way to ask them to |
|
42 # return all templates. |
|
43 # |
|
44 # - If you put the {% csrf_token %} tag on the same line as the <form> tag it |
|
45 # will be detected, otherwise it will be assumed that the form does not have |
|
46 # the token. |
|
47 # |
|
48 # - It's impossible to programmatically determine which forms should and should |
|
49 # not have the token added. The developer must decide when to do this, |
|
50 # ensuring that the token is only added to internally targetted forms. |
|
51 # |
|
52 # - It's impossible to programmatically work out when a template is used. The |
|
53 # attempts to trace back to view functions are guesses, and could easily fail |
|
54 # in the following ways: |
|
55 # |
|
56 # * If the 'include' template tag is used with a variable |
|
57 # i.e. {% include tname %} where tname is a variable containing the actual |
|
58 # template name, rather than {% include "my_template.html" %}. |
|
59 # |
|
60 # * If the template name has been built up by view code instead of as a simple |
|
61 # string. For example, generic views and the admin both do this. (These |
|
62 # apps are both contrib and both use RequestContext already, as it happens). |
|
63 # |
|
64 # * If the 'ssl' tag (or any template tag other than 'include') is used to |
|
65 # include the template in another template. |
|
66 # |
|
67 # - All templates belonging to apps referenced in INSTALLED_APPS will be |
|
68 # searched, which may include third party apps or Django contrib. In some |
|
69 # cases, this will be a good thing, because even if the templates of these |
|
70 # apps have been fixed by someone else, your own view code may reference the |
|
71 # same template and may need to be updated. |
|
72 # |
|
73 # You may, however, wish to comment out some entries in INSTALLED_APPS or |
|
74 # TEMPLATE_DIRS before running this script. |
|
75 |
|
76 # Improvements to this script are welcome! |
|
77 |
|
78 # Configuration |
|
79 # ============= |
|
80 |
|
81 TEMPLATE_EXTENSIONS = [ |
|
82 ".html", |
|
83 ".htm", |
|
84 ] |
|
85 |
|
86 PYTHON_SOURCE_EXTENSIONS = [ |
|
87 ".py", |
|
88 ] |
|
89 |
|
90 TEMPLATE_ENCODING = "UTF-8" |
|
91 |
|
92 PYTHON_ENCODING = "UTF-8" |
|
93 |
|
94 # Method |
|
95 # ====== |
|
96 |
|
97 # Find templates: |
|
98 # - template dirs |
|
99 # - installed apps |
|
100 # |
|
101 # Search for POST forms |
|
102 # - Work out what the name of the template is, as it would appear in an |
|
103 # 'include' or get_template() call. This can be done by comparing template |
|
104 # filename to all template dirs. Some templates can have more than one |
|
105 # 'name' e.g. if a directory and one of its child directories are both in |
|
106 # TEMPLATE_DIRS. This is actually a common hack used for |
|
107 # overriding-and-extending admin templates. |
|
108 # |
|
109 # For each POST form, |
|
110 # - see if it already contains '{% csrf_token %}' immediately after <form> |
|
111 # - work back to the view function(s): |
|
112 # - First, see if the form is included in any other templates, then |
|
113 # recursively compile a list of affected templates. |
|
114 # - Find any code function that references that template. This is just a |
|
115 # brute force text search that can easily return false positives |
|
116 # and fail to find real instances. |
|
117 |
|
118 |
|
119 import os |
|
120 import sys |
|
121 import re |
|
122 from optparse import OptionParser |
|
123 |
|
124 USAGE = """ |
|
125 This tool helps to locate forms that need CSRF tokens added and the |
|
126 corresponding view code. This processing is NOT fool proof, and you should read |
|
127 the help contained in the script itself. Also, this script may need configuring |
|
128 (by editing the script) before use. |
|
129 |
|
130 Usage: |
|
131 |
|
132 python csrf_migration_helper.py [--settings=path.to.your.settings] /path/to/python/code [more paths...] |
|
133 |
|
134 Paths can be specified as relative paths. |
|
135 |
|
136 With no arguments, this help is printed. |
|
137 """ |
|
138 |
|
139 _POST_FORM_RE = \ |
|
140 re.compile(r'(<form\W[^>]*\bmethod\s*=\s*(\'|"|)POST(\'|"|)\b[^>]*>)', re.IGNORECASE) |
|
141 _TOKEN_RE = re.compile('\{% csrf_token') |
|
142 |
|
143 def get_template_dirs(): |
|
144 """ |
|
145 Returns a set of all directories that contain project templates. |
|
146 """ |
|
147 from django.conf import settings |
|
148 dirs = set() |
|
149 if ('django.template.loaders.filesystem.load_template_source' in settings.TEMPLATE_LOADERS |
|
150 or 'django.template.loaders.filesystem.Loader' in settings.TEMPLATE_LOADERS): |
|
151 dirs.update(map(unicode, settings.TEMPLATE_DIRS)) |
|
152 |
|
153 if ('django.template.loaders.app_directories.load_template_source' in settings.TEMPLATE_LOADERS |
|
154 or 'django.template.loaders.app_directories.Loader' in settings.TEMPLATE_LOADERS): |
|
155 from django.template.loaders.app_directories import app_template_dirs |
|
156 dirs.update(app_template_dirs) |
|
157 return dirs |
|
158 |
|
159 def make_template_info(filename, root_dirs): |
|
160 """ |
|
161 Creates a Template object for a filename, calculating the possible |
|
162 relative_filenames from the supplied filename and root template directories |
|
163 """ |
|
164 return Template(filename, |
|
165 [filename[len(d)+1:] for d in root_dirs if filename.startswith(d)]) |
|
166 |
|
167 |
|
168 class Template(object): |
|
169 def __init__(self, absolute_filename, relative_filenames): |
|
170 self.absolute_filename, self.relative_filenames = absolute_filename, relative_filenames |
|
171 |
|
172 def content(self): |
|
173 try: |
|
174 return self._content |
|
175 except AttributeError: |
|
176 fd = open(self.absolute_filename) |
|
177 try: |
|
178 content = fd.read().decode(TEMPLATE_ENCODING) |
|
179 except UnicodeDecodeError, e: |
|
180 message = '%s in %s' % ( |
|
181 e[4], self.absolute_filename.encode('UTF-8', 'ignore')) |
|
182 raise UnicodeDecodeError(*(e.args[:4] + (message,))) |
|
183 fd.close() |
|
184 self._content = content |
|
185 return content |
|
186 content = property(content) |
|
187 |
|
188 def post_form_info(self): |
|
189 """ |
|
190 Get information about any POST forms in the template. |
|
191 Returns [(linenumber, csrf_token added)] |
|
192 """ |
|
193 matches = [] |
|
194 for ln, line in enumerate(self.content.split("\n")): |
|
195 m = _POST_FORM_RE.search(line) |
|
196 if m is not None: |
|
197 matches.append((ln + 1, _TOKEN_RE.search(line) is not None)) |
|
198 return matches |
|
199 |
|
200 def includes_template(self, t): |
|
201 """ |
|
202 Returns true if this template includes template 't' (via {% include %}) |
|
203 """ |
|
204 for r in t.relative_filenames: |
|
205 if re.search(r'\{%\s*include\s+(\'|")' + re.escape(r) + r'(\1)\s*%\}', self.content): |
|
206 return True |
|
207 return False |
|
208 |
|
209 def related_templates(self): |
|
210 """ |
|
211 Returns all templates that include this one, recursively. (starting |
|
212 with this one) |
|
213 """ |
|
214 try: |
|
215 return self._related_templates |
|
216 except AttributeError: |
|
217 pass |
|
218 |
|
219 retval = set([self]) |
|
220 for t in self.all_templates: |
|
221 if t.includes_template(self): |
|
222 # If two templates mutually include each other, directly or |
|
223 # indirectly, we have a problem here... |
|
224 retval = retval.union(t.related_templates()) |
|
225 |
|
226 self._related_templates = retval |
|
227 return retval |
|
228 |
|
229 def __repr__(self): |
|
230 return repr(self.absolute_filename) |
|
231 |
|
232 def __eq__(self, other): |
|
233 return self.absolute_filename == other.absolute_filename |
|
234 |
|
235 def __hash__(self): |
|
236 return hash(self.absolute_filename) |
|
237 |
|
238 def get_templates(dirs): |
|
239 """ |
|
240 Returns all files in dirs that have template extensions, as Template |
|
241 objects. |
|
242 """ |
|
243 templates = set() |
|
244 for root in dirs: |
|
245 for (dirpath, dirnames, filenames) in os.walk(root): |
|
246 for f in filenames: |
|
247 if len([True for e in TEMPLATE_EXTENSIONS if f.endswith(e)]) > 0: |
|
248 t = make_template_info(os.path.join(dirpath, f), dirs) |
|
249 # templates need to be able to search others: |
|
250 t.all_templates = templates |
|
251 templates.add(t) |
|
252 return templates |
|
253 |
|
254 def get_python_code(paths): |
|
255 """ |
|
256 Returns all Python code, as a list of tuples, each one being: |
|
257 (filename, list of lines) |
|
258 """ |
|
259 retval = [] |
|
260 for p in paths: |
|
261 if not os.path.isdir(p): |
|
262 raise Exception("'%s' is not a directory." % p) |
|
263 for (dirpath, dirnames, filenames) in os.walk(p): |
|
264 for f in filenames: |
|
265 if len([True for e in PYTHON_SOURCE_EXTENSIONS if f.endswith(e)]) > 0: |
|
266 fn = os.path.join(dirpath, f) |
|
267 fd = open(fn) |
|
268 content = [l.decode(PYTHON_ENCODING) for l in fd.readlines()] |
|
269 fd.close() |
|
270 retval.append((fn, content)) |
|
271 return retval |
|
272 |
|
273 def search_python_list(python_code, template_names): |
|
274 """ |
|
275 Searches python code for a list of template names. |
|
276 Returns a list of tuples, each one being: |
|
277 (filename, line number) |
|
278 """ |
|
279 retval = [] |
|
280 for tn in template_names: |
|
281 retval.extend(search_python(python_code, tn)) |
|
282 retval = list(set(retval)) |
|
283 retval.sort() |
|
284 return retval |
|
285 |
|
286 def search_python(python_code, template_name): |
|
287 """ |
|
288 Searches Python code for a template name. |
|
289 Returns a list of tuples, each one being: |
|
290 (filename, line number) |
|
291 """ |
|
292 retval = [] |
|
293 for fn, content in python_code: |
|
294 for ln, line in enumerate(content): |
|
295 if ((u'"%s"' % template_name) in line) or \ |
|
296 ((u"'%s'" % template_name) in line): |
|
297 retval.append((fn, ln + 1)) |
|
298 return retval |
|
299 |
|
300 def main(pythonpaths): |
|
301 template_dirs = get_template_dirs() |
|
302 templates = get_templates(template_dirs) |
|
303 python_code = get_python_code(pythonpaths) |
|
304 for t in templates: |
|
305 # Logic |
|
306 form_matches = t.post_form_info() |
|
307 num_post_forms = len(form_matches) |
|
308 form_lines_without_token = [ln for (ln, has_token) in form_matches if not has_token] |
|
309 if num_post_forms == 0: |
|
310 continue |
|
311 to_search = [rf for rt in t.related_templates() for rf in rt.relative_filenames] |
|
312 found = search_python_list(python_code, to_search) |
|
313 |
|
314 # Display: |
|
315 print t.absolute_filename |
|
316 for r in t.relative_filenames: |
|
317 print u" AKA %s" % r |
|
318 print u" POST forms: %s" % num_post_forms |
|
319 print u" With token: %s" % (num_post_forms - len(form_lines_without_token)) |
|
320 if form_lines_without_token: |
|
321 print u" Without token:" |
|
322 for ln in form_lines_without_token: |
|
323 print "%s:%d:" % (t.absolute_filename, ln) |
|
324 print |
|
325 print u" Searching for:" |
|
326 for r in to_search: |
|
327 print u" " + r |
|
328 print |
|
329 print u" Found:" |
|
330 if len(found) == 0: |
|
331 print " Nothing" |
|
332 else: |
|
333 for fn, ln in found: |
|
334 print "%s:%d:" % (fn, ln) |
|
335 |
|
336 print |
|
337 print "----" |
|
338 |
|
339 |
|
340 parser = OptionParser(usage=USAGE) |
|
341 parser.add_option("", "--settings", action="store", dest="settings", help="Dotted path to settings file") |
|
342 |
|
343 if __name__ == '__main__': |
|
344 options, args = parser.parse_args() |
|
345 if len(args) == 0: |
|
346 parser.print_help() |
|
347 sys.exit(1) |
|
348 |
|
349 settings = getattr(options, 'settings', None) |
|
350 if settings is None: |
|
351 if os.environ.get("DJANGO_SETTINGS_MODULE", None) is None: |
|
352 print "You need to set DJANGO_SETTINGS_MODULE or use the '--settings' parameter" |
|
353 sys.exit(1) |
|
354 else: |
|
355 os.environ["DJANGO_SETTINGS_MODULE"] = settings |
|
356 |
|
357 main(args) |