author | Madhusudan.C.S <madhusudancs@gmail.com> |
Mon, 10 Aug 2009 00:06:31 +0530 | |
changeset 24 | 7257b66a6766 |
parent 20 | 327b3f0b73bb |
permissions | -rwxr-xr-x |
20
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
1 |
# urllib2 opener to connection through a proxy using the CONNECT method, (useful for SSL) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
2 |
# tested with python 2.4 |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
3 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
4 |
import urllib2 |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
5 |
import urllib |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
6 |
import httplib |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
7 |
import socket |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
8 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
9 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
10 |
class ProxyHTTPConnection(httplib.HTTPConnection): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
11 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
12 |
_ports = {'http' : 80, 'https' : 443} |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
13 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
14 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
15 |
def request(self, method, url, body=None, headers={}): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
16 |
#request is called before connect, so can interpret url and get |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
17 |
#real host/port to be used to make CONNECT request to proxy |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
18 |
proto, rest = urllib.splittype(url) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
19 |
if proto is None: |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
20 |
raise ValueError, "unknown URL type: %s" % url |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
21 |
#get host |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
22 |
host, rest = urllib.splithost(rest) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
23 |
#try to get port |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
24 |
host, port = urllib.splitport(host) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
25 |
#if port is not defined try to get from proto |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
26 |
if port is None: |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
27 |
try: |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
28 |
port = self._ports[proto] |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
29 |
except KeyError: |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
30 |
raise ValueError, "unknown protocol for: %s" % url |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
31 |
self._real_host = host |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
32 |
self._real_port = port |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
33 |
httplib.HTTPConnection.request(self, method, url, body, headers) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
34 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
35 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
36 |
def connect(self): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
37 |
httplib.HTTPConnection.connect(self) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
38 |
#send proxy CONNECT request |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
39 |
self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host, self._real_port)) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
40 |
#expect a HTTP/1.0 200 Connection established |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
41 |
response = self.response_class(self.sock, strict=self.strict, method=self._method) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
42 |
(version, code, message) = response._read_status() |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
43 |
#probably here we can handle auth requests... |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
44 |
if code != 200: |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
45 |
#proxy returned and error, abort connection, and raise exception |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
46 |
self.close() |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
47 |
raise socket.error, "Proxy connection failed: %d %s" % (code, message.strip()) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
48 |
#eat up header block from proxy.... |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
49 |
while True: |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
50 |
#should not use directly fp probablu |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
51 |
line = response.fp.readline() |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
52 |
if line == '\r\n': break |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
53 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
54 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
55 |
class ProxyHTTPSConnection(ProxyHTTPConnection): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
56 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
57 |
default_port = 443 |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
58 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
59 |
def __init__(self, host, port = None, key_file = None, cert_file = None, strict = None): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
60 |
ProxyHTTPConnection.__init__(self, host, port) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
61 |
self.key_file = key_file |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
62 |
self.cert_file = cert_file |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
63 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
64 |
def connect(self): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
65 |
ProxyHTTPConnection.connect(self) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
66 |
#make the sock ssl-aware |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
67 |
ssl = socket.ssl(self.sock, self.key_file, self.cert_file) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
68 |
self.sock = httplib.FakeSocket(self.sock, ssl) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
69 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
70 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
71 |
class ConnectHTTPHandler(urllib2.HTTPHandler): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
72 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
73 |
def do_open(self, http_class, req): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
74 |
return urllib2.HTTPHandler.do_open(self, ProxyHTTPConnection, req) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
75 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
76 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
77 |
class ConnectHTTPSHandler(urllib2.HTTPSHandler): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
78 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
79 |
def do_open(self, http_class, req): |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
80 |
return urllib2.HTTPSHandler.do_open(self, ProxyHTTPSConnection, req) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
81 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
82 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
83 |
if __name__ == '__main__': |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
84 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
85 |
import sys |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
86 |
|
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
87 |
opener = urllib2.build_opener(ConnectHTTPHandler, ConnectHTTPSHandler) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
88 |
urllib2.install_opener(opener) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
89 |
req = urllib2.Request(url='http://google.com') |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
90 |
req.set_proxy('10.101.1.1:80', 'http') |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
91 |
f = urllib2.urlopen(req) |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
92 |
print f.read() |
327b3f0b73bb
Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff
changeset
|
93 |