--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/app/python25src/urllib.py Sat Oct 04 06:25:07 2008 +0000
@@ -0,0 +1,342 @@
+"""Open an arbitrary URL.
+
+See the following document for more info on URLs:
+"Names and Addresses, URIs, URLs, URNs, URCs", at
+http://www.w3.org/pub/WWW/Addressing/Overview.html
+
+See also the HTTP spec (from which the error codes are derived):
+"HTTP - Hypertext Transfer Protocol", at
+http://www.w3.org/pub/WWW/Protocols/
+
+Related standards and specs:
+- RFC1808: the "relative URL" spec. (authoritative status)
+- RFC1738 - the "URL standard". (authoritative status)
+- RFC1630 - the "URI spec". (informational status)
+
+All code but that related to URL parsing has been removed (since it is not
+compatible with Google App Engine)from this fork of the original file,
+obtained from:
+http://svn.python.org/view/*checkout*/python/tags/r252/Lib/urllib.py?content-type=text%2Fplain&rev=60915
+"""
+
+import string
+import sys
+from urlparse import urljoin as basejoin
+
+__all__ = ["quote", "quote_plus", "unquote", "unquote_plus",
+ "urlencode", "splittag",
+ "basejoin", "unwrap",
+ "splittype", "splithost", "splituser", "splitpasswd", "splitport",
+ "splitnport", "splitquery", "splitattr", "splitvalue",
+ "splitgophertype",]
+
+__version__ = '1.17' # XXX This version is not always updated :-(
+
+
+# Utilities to parse URLs (most of these return None for missing parts):
+# unwrap('<URL:type://host/path>') --> 'type://host/path'
+# splittype('type:opaquestring') --> 'type', 'opaquestring'
+# splithost('//host[:port]/path') --> 'host[:port]', '/path'
+# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
+# splitpasswd('user:passwd') -> 'user', 'passwd'
+# splitport('host:port') --> 'host', 'port'
+# splitquery('/path?query') --> '/path', 'query'
+# splittag('/path#tag') --> '/path', 'tag'
+# splitattr('/path;attr1=value1;attr2=value2;...') ->
+# '/path', ['attr1=value1', 'attr2=value2', ...]
+# splitvalue('attr=value') --> 'attr', 'value'
+# splitgophertype('/Xselector') --> 'X', 'selector'
+# unquote('abc%20def') -> 'abc def'
+# quote('abc def') -> 'abc%20def')
+
+try:
+ unicode
+except NameError:
+ def _is_unicode(x):
+ return 0
+else:
+ def _is_unicode(x):
+ return isinstance(x, unicode)
+
+def toBytes(url):
+ """toBytes(u"URL") --> 'URL'."""
+ # Most URL schemes require ASCII. If that changes, the conversion
+ # can be relaxed
+ if _is_unicode(url):
+ try:
+ url = url.encode("ASCII")
+ except UnicodeError:
+ raise UnicodeError("URL " + repr(url) +
+ " contains non-ASCII characters")
+ return url
+
+def unwrap(url):
+ """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
+ url = url.strip()
+ if url[:1] == '<' and url[-1:] == '>':
+ url = url[1:-1].strip()
+ if url[:4] == 'URL:': url = url[4:].strip()
+ return url
+
+_typeprog = None
+def splittype(url):
+ """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
+ global _typeprog
+ if _typeprog is None:
+ import re
+ _typeprog = re.compile('^([^/:]+):')
+
+ match = _typeprog.match(url)
+ if match:
+ scheme = match.group(1)
+ return scheme.lower(), url[len(scheme) + 1:]
+ return None, url
+
+_hostprog = None
+def splithost(url):
+ """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
+ global _hostprog
+ if _hostprog is None:
+ import re
+ _hostprog = re.compile('^//([^/?]*)(.*)$')
+
+ match = _hostprog.match(url)
+ if match: return match.group(1, 2)
+ return None, url
+
+_userprog = None
+def splituser(host):
+ """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
+ global _userprog
+ if _userprog is None:
+ import re
+ _userprog = re.compile('^(.*)@(.*)$')
+
+ match = _userprog.match(host)
+ if match: return map(unquote, match.group(1, 2))
+ return None, host
+
+_passwdprog = None
+def splitpasswd(user):
+ """splitpasswd('user:passwd') -> 'user', 'passwd'."""
+ global _passwdprog
+ if _passwdprog is None:
+ import re
+ _passwdprog = re.compile('^([^:]*):(.*)$')
+
+ match = _passwdprog.match(user)
+ if match: return match.group(1, 2)
+ return user, None
+
+# splittag('/path#tag') --> '/path', 'tag'
+_portprog = None
+def splitport(host):
+ """splitport('host:port') --> 'host', 'port'."""
+ global _portprog
+ if _portprog is None:
+ import re
+ _portprog = re.compile('^(.*):([0-9]+)$')
+
+ match = _portprog.match(host)
+ if match: return match.group(1, 2)
+ return host, None
+
+_nportprog = None
+def splitnport(host, defport=-1):
+ """Split host and port, returning numeric port.
+ Return given default port if no ':' found; defaults to -1.
+ Return numerical port if a valid number are found after ':'.
+ Return None if ':' but not a valid number."""
+ global _nportprog
+ if _nportprog is None:
+ import re
+ _nportprog = re.compile('^(.*):(.*)$')
+
+ match = _nportprog.match(host)
+ if match:
+ host, port = match.group(1, 2)
+ try:
+ if not port: raise ValueError, "no digits"
+ nport = int(port)
+ except ValueError:
+ nport = None
+ return host, nport
+ return host, defport
+
+_queryprog = None
+def splitquery(url):
+ """splitquery('/path?query') --> '/path', 'query'."""
+ global _queryprog
+ if _queryprog is None:
+ import re
+ _queryprog = re.compile('^(.*)\?([^?]*)$')
+
+ match = _queryprog.match(url)
+ if match: return match.group(1, 2)
+ return url, None
+
+_tagprog = None
+def splittag(url):
+ """splittag('/path#tag') --> '/path', 'tag'."""
+ global _tagprog
+ if _tagprog is None:
+ import re
+ _tagprog = re.compile('^(.*)#([^#]*)$')
+
+ match = _tagprog.match(url)
+ if match: return match.group(1, 2)
+ return url, None
+
+def splitattr(url):
+ """splitattr('/path;attr1=value1;attr2=value2;...') ->
+ '/path', ['attr1=value1', 'attr2=value2', ...]."""
+ words = url.split(';')
+ return words[0], words[1:]
+
+_valueprog = None
+def splitvalue(attr):
+ """splitvalue('attr=value') --> 'attr', 'value'."""
+ global _valueprog
+ if _valueprog is None:
+ import re
+ _valueprog = re.compile('^([^=]*)=(.*)$')
+
+ match = _valueprog.match(attr)
+ if match: return match.group(1, 2)
+ return attr, None
+
+def splitgophertype(selector):
+ """splitgophertype('/Xselector') --> 'X', 'selector'."""
+ if selector[:1] == '/' and selector[1:2]:
+ return selector[1], selector[2:]
+ return None, selector
+
+_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
+_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+
+def unquote(s):
+ """unquote('abc%20def') -> 'abc def'."""
+ res = s.split('%')
+ for i in xrange(1, len(res)):
+ item = res[i]
+ try:
+ res[i] = _hextochr[item[:2]] + item[2:]
+ except KeyError:
+ res[i] = '%' + item
+ except UnicodeDecodeError:
+ res[i] = unichr(int(item[:2], 16)) + item[2:]
+ return "".join(res)
+
+def unquote_plus(s):
+ """unquote('%7e/abc+def') -> '~/abc def'"""
+ s = s.replace('+', ' ')
+ return unquote(s)
+
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ 'abcdefghijklmnopqrstuvwxyz'
+ '0123456789' '_.-')
+_safemaps = {}
+
+def quote(s, safe = '/'):
+ """quote('abc def') -> 'abc%20def'
+
+ Each part of a URL, e.g. the path info, the query, etc., has a
+ different set of reserved characters that must be quoted.
+
+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+ the following reserved characters.
+
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+ "$" | ","
+
+ Each of these characters is reserved in some component of a URL,
+ but not necessarily in all of them.
+
+ By default, the quote function is intended for quoting the path
+ section of a URL. Thus, it will not encode '/'. This character
+ is reserved, but in typical usage the quote function is being
+ called on a path where the existing slash characters are used as
+ reserved characters.
+ """
+ cachekey = (safe, always_safe)
+ try:
+ safe_map = _safemaps[cachekey]
+ except KeyError:
+ safe += always_safe
+ safe_map = {}
+ for i in range(256):
+ c = chr(i)
+ safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+ _safemaps[cachekey] = safe_map
+ res = map(safe_map.__getitem__, s)
+ return ''.join(res)
+
+def quote_plus(s, safe = ''):
+ """Quote the query fragment of a URL; replacing ' ' with '+'"""
+ if ' ' in s:
+ s = quote(s, safe + ' ')
+ return s.replace(' ', '+')
+ return quote(s, safe)
+
+def urlencode(query,doseq=0):
+ """Encode a sequence of two-element tuples or dictionary into a URL query string.
+
+ If any values in the query arg are sequences and doseq is true, each
+ sequence element is converted to a separate parameter.
+
+ If the query arg is a sequence of two-element tuples, the order of the
+ parameters in the output will match the order of parameters in the
+ input.
+ """
+
+ if hasattr(query,"items"):
+ # mapping objects
+ query = query.items()
+ else:
+ # it's a bother at times that strings and string-like objects are
+ # sequences...
+ try:
+ # non-sequence items should not work with len()
+ # non-empty strings will fail this
+ if len(query) and not isinstance(query[0], tuple):
+ raise TypeError
+ # zero-length sequences of all types will get here and succeed,
+ # but that's a minor nit - since the original implementation
+ # allowed empty dicts that type of behavior probably should be
+ # preserved for consistency
+ except TypeError:
+ ty,va,tb = sys.exc_info()
+ raise TypeError, "not a valid non-string sequence or mapping object", tb
+
+ l = []
+ if not doseq:
+ # preserve old behavior
+ for k, v in query:
+ k = quote_plus(str(k))
+ v = quote_plus(str(v))
+ l.append(k + '=' + v)
+ else:
+ for k, v in query:
+ k = quote_plus(str(k))
+ if isinstance(v, str):
+ v = quote_plus(v)
+ l.append(k + '=' + v)
+ elif _is_unicode(v):
+ # is there a reasonable way to convert to ASCII?
+ # encode generates a string, but "replace" or "ignore"
+ # lose information and "strict" can raise UnicodeError
+ v = quote_plus(v.encode("ASCII","replace"))
+ l.append(k + '=' + v)
+ else:
+ try:
+ # is this a sufficient test for sequence-ness?
+ x = len(v)
+ except TypeError:
+ # not a sequence
+ v = quote_plus(str(v))
+ l.append(k + '=' + v)
+ else:
+ # loop over the sequence
+ for elt in v:
+ l.append(k + '=' + quote_plus(str(elt)))
+ return '&'.join(l)