|
1 import md5 |
|
2 import re |
|
3 |
|
4 from django.conf import settings |
|
5 from django import http |
|
6 from django.core.mail import mail_managers |
|
7 from django.utils.http import urlquote |
|
8 from django.core import urlresolvers |
|
9 |
|
10 class CommonMiddleware(object): |
|
11 """ |
|
12 "Common" middleware for taking care of some basic operations: |
|
13 |
|
14 - Forbids access to User-Agents in settings.DISALLOWED_USER_AGENTS |
|
15 |
|
16 - URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings, |
|
17 this middleware appends missing slashes and/or prepends missing |
|
18 "www."s. |
|
19 |
|
20 - If APPEND_SLASH is set and the initial URL doesn't end with a |
|
21 slash, and it is not found in urlpatterns, a new URL is formed by |
|
22 appending a slash at the end. If this new URL is found in |
|
23 urlpatterns, then an HTTP-redirect is returned to this new URL; |
|
24 otherwise the initial URL is processed as usual. |
|
25 |
|
26 - ETags: If the USE_ETAGS setting is set, ETags will be calculated from |
|
27 the entire page content and Not Modified responses will be returned |
|
28 appropriately. |
|
29 """ |
|
30 |
|
31 def process_request(self, request): |
|
32 """ |
|
33 Check for denied User-Agents and rewrite the URL based on |
|
34 settings.APPEND_SLASH and settings.PREPEND_WWW |
|
35 """ |
|
36 |
|
37 # Check for denied User-Agents |
|
38 if 'HTTP_USER_AGENT' in request.META: |
|
39 for user_agent_regex in settings.DISALLOWED_USER_AGENTS: |
|
40 if user_agent_regex.search(request.META['HTTP_USER_AGENT']): |
|
41 return http.HttpResponseForbidden('<h1>Forbidden</h1>') |
|
42 |
|
43 # Check for a redirect based on settings.APPEND_SLASH |
|
44 # and settings.PREPEND_WWW |
|
45 host = request.get_host() |
|
46 old_url = [host, request.path] |
|
47 new_url = old_url[:] |
|
48 |
|
49 if (settings.PREPEND_WWW and old_url[0] and |
|
50 not old_url[0].startswith('www.')): |
|
51 new_url[0] = 'www.' + old_url[0] |
|
52 |
|
53 # Append a slash if APPEND_SLASH is set and the URL doesn't have a |
|
54 # trailing slash and there is no pattern for the current path |
|
55 if settings.APPEND_SLASH and (not old_url[1].endswith('/')): |
|
56 try: |
|
57 urlresolvers.resolve(request.path) |
|
58 except urlresolvers.Resolver404: |
|
59 new_url[1] = new_url[1] + '/' |
|
60 if settings.DEBUG and request.method == 'POST': |
|
61 raise RuntimeError, ("" |
|
62 "You called this URL via POST, but the URL doesn't end " |
|
63 "in a slash and you have APPEND_SLASH set. Django can't " |
|
64 "redirect to the slash URL while maintaining POST data. " |
|
65 "Change your form to point to %s%s (note the trailing " |
|
66 "slash), or set APPEND_SLASH=False in your Django " |
|
67 "settings.") % (new_url[0], new_url[1]) |
|
68 |
|
69 if new_url != old_url: |
|
70 # Redirect if the target url exists |
|
71 try: |
|
72 urlresolvers.resolve(new_url[1]) |
|
73 except urlresolvers.Resolver404: |
|
74 pass |
|
75 else: |
|
76 if new_url[0]: |
|
77 newurl = "%s://%s%s" % ( |
|
78 request.is_secure() and 'https' or 'http', |
|
79 new_url[0], urlquote(new_url[1])) |
|
80 else: |
|
81 newurl = urlquote(new_url[1]) |
|
82 if request.GET: |
|
83 newurl += '?' + request.GET.urlencode() |
|
84 return http.HttpResponsePermanentRedirect(newurl) |
|
85 |
|
86 return None |
|
87 |
|
88 def process_response(self, request, response): |
|
89 "Check for a flat page (for 404s) and calculate the Etag, if needed." |
|
90 if response.status_code == 404: |
|
91 if settings.SEND_BROKEN_LINK_EMAILS: |
|
92 # If the referrer was from an internal link or a non-search-engine site, |
|
93 # send a note to the managers. |
|
94 domain = request.get_host() |
|
95 referer = request.META.get('HTTP_REFERER', None) |
|
96 is_internal = _is_internal_request(domain, referer) |
|
97 path = request.get_full_path() |
|
98 if referer and not _is_ignorable_404(path) and (is_internal or '?' not in referer): |
|
99 ua = request.META.get('HTTP_USER_AGENT', '<none>') |
|
100 ip = request.META.get('REMOTE_ADDR', '<none>') |
|
101 mail_managers("Broken %slink on %s" % ((is_internal and 'INTERNAL ' or ''), domain), |
|
102 "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" \ |
|
103 % (referer, request.get_full_path(), ua, ip)) |
|
104 return response |
|
105 |
|
106 # Use ETags, if requested. |
|
107 if settings.USE_ETAGS: |
|
108 if response.has_header('ETag'): |
|
109 etag = response['ETag'] |
|
110 else: |
|
111 etag = md5.new(response.content).hexdigest() |
|
112 if response.status_code >= 200 and response.status_code < 300 and request.META.get('HTTP_IF_NONE_MATCH') == etag: |
|
113 cookies = response.cookies |
|
114 response = http.HttpResponseNotModified() |
|
115 response.cookies = cookies |
|
116 else: |
|
117 response['ETag'] = etag |
|
118 |
|
119 return response |
|
120 |
|
121 def _is_ignorable_404(uri): |
|
122 "Returns True if a 404 at the given URL *shouldn't* notify the site managers" |
|
123 for start in settings.IGNORABLE_404_STARTS: |
|
124 if uri.startswith(start): |
|
125 return True |
|
126 for end in settings.IGNORABLE_404_ENDS: |
|
127 if uri.endswith(end): |
|
128 return True |
|
129 return False |
|
130 |
|
131 def _is_internal_request(domain, referer): |
|
132 "Return true if the referring URL is the same domain as the current request" |
|
133 # Different subdomains are treated as different domains. |
|
134 return referer is not None and re.match("^https?://%s/" % re.escape(domain), referer) |