app/site-content/proposals/2009/08/07/proxy.py
author Madhusudan.C.S <madhusudancs@gmail.com>
Sun, 09 Aug 2009 12:40:14 +0530
changeset 20 327b3f0b73bb
permissions -rwxr-xr-x
Added data fetching scripts.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
20
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     1
# urllib2 opener to connection through a proxy using the CONNECT method, (useful for SSL)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     2
# tested with python 2.4
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     3
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     4
import urllib2
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     5
import urllib
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     6
import httplib
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     7
import socket
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     8
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
     9
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    10
class ProxyHTTPConnection(httplib.HTTPConnection):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    11
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    12
    _ports = {'http' : 80, 'https' : 443}
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    13
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    14
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    15
    def request(self, method, url, body=None, headers={}):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    16
        #request is called before connect, so can interpret url and get
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    17
        #real host/port to be used to make CONNECT request to proxy
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    18
        proto, rest = urllib.splittype(url)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    19
        if proto is None:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    20
            raise ValueError, "unknown URL type: %s" % url
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    21
        #get host
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    22
        host, rest = urllib.splithost(rest)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    23
        #try to get port
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    24
        host, port = urllib.splitport(host)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    25
        #if port is not defined try to get from proto
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    26
        if port is None:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    27
            try:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    28
                port = self._ports[proto]
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    29
            except KeyError:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    30
                raise ValueError, "unknown protocol for: %s" % url
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    31
        self._real_host = host
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    32
        self._real_port = port
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    33
        httplib.HTTPConnection.request(self, method, url, body, headers)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    34
        
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    35
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    36
    def connect(self):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    37
        httplib.HTTPConnection.connect(self)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    38
        #send proxy CONNECT request
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    39
        self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host, self._real_port))
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    40
        #expect a HTTP/1.0 200 Connection established
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    41
        response = self.response_class(self.sock, strict=self.strict, method=self._method)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    42
        (version, code, message) = response._read_status()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    43
        #probably here we can handle auth requests...
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    44
        if code != 200:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    45
            #proxy returned and error, abort connection, and raise exception
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    46
            self.close()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    47
            raise socket.error, "Proxy connection failed: %d %s" % (code, message.strip())
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    48
        #eat up header block from proxy....
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    49
        while True:
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    50
            #should not use directly fp probablu
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    51
            line = response.fp.readline()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    52
            if line == '\r\n': break
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    53
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    54
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    55
class ProxyHTTPSConnection(ProxyHTTPConnection):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    56
    
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    57
    default_port = 443
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    58
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    59
    def __init__(self, host, port = None, key_file = None, cert_file = None, strict = None):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    60
        ProxyHTTPConnection.__init__(self, host, port)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    61
        self.key_file = key_file
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    62
        self.cert_file = cert_file
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    63
    
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    64
    def connect(self):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    65
        ProxyHTTPConnection.connect(self)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    66
        #make the sock ssl-aware
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    67
        ssl = socket.ssl(self.sock, self.key_file, self.cert_file)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    68
        self.sock = httplib.FakeSocket(self.sock, ssl)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    69
        
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    70
                                       
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    71
class ConnectHTTPHandler(urllib2.HTTPHandler):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    72
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    73
    def do_open(self, http_class, req):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    74
        return urllib2.HTTPHandler.do_open(self, ProxyHTTPConnection, req)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    75
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    76
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    77
class ConnectHTTPSHandler(urllib2.HTTPSHandler):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    78
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    79
    def do_open(self, http_class, req):
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    80
        return urllib2.HTTPSHandler.do_open(self, ProxyHTTPSConnection, req)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    81
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    82
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    83
if __name__ == '__main__':
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    84
    
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    85
    import sys
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    86
    
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    87
    opener = urllib2.build_opener(ConnectHTTPHandler, ConnectHTTPSHandler)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    88
    urllib2.install_opener(opener)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    89
    req = urllib2.Request(url='http://google.com')
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    90
    req.set_proxy('10.101.1.1:80', 'http')
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    91
    f = urllib2.urlopen(req)
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    92
    print f.read()
327b3f0b73bb Added data fetching scripts.
Madhusudan.C.S <madhusudancs@gmail.com>
parents:
diff changeset
    93