py_tasks_melange: comparison app/python25src/urllib.py

equal deleted inserted replaced

-:6314fce617c5
+:ce9b10bbdd42
+"""Open an arbitrary URL.
+See the following document for more info on URLs:
+"Names and Addresses, URIs, URLs, URNs, URCs", at
+http://www.w3.org/pub/WWW/Addressing/Overview.html
+See also the HTTP spec (from which the error codes are derived):
+"HTTP - Hypertext Transfer Protocol", at
+http://www.w3.org/pub/WWW/Protocols/
+Related standards and specs:
+- RFC1808: the "relative URL" spec. (authoritative status)
+- RFC1738 - the "URL standard". (authoritative status)
+- RFC1630 - the "URI spec". (informational status)
+All code but that related to URL parsing has been removed (since it is not
+compatible with Google App Engine)from this fork of the original file,
+obtained from:
+http://svn.python.org/view/*checkout*/python/tags/r252/Lib/urllib.py?content-type=text%2Fplain&rev=60915
+"""
+import string
+import sys
+from urlparse import urljoin as basejoin
+__all__ = ["quote", "quote_plus", "unquote", "unquote_plus",
+"urlencode", "splittag",
+"basejoin", "unwrap",
+"splittype", "splithost", "splituser", "splitpasswd", "splitport",
+"splitnport", "splitquery", "splitattr", "splitvalue",
+"splitgophertype",]
+__version__ = '1.17'    # XXX This version is not always updated :-(
+# Utilities to parse URLs (most of these return None for missing parts):
+# unwrap('<URL:type://host/path>') --> 'type://host/path'
+# splittype('type:opaquestring') --> 'type', 'opaquestring'
+# splithost('//host[:port]/path') --> 'host[:port]', '/path'
+# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
+# splitpasswd('user:passwd') -> 'user', 'passwd'
+# splitport('host:port') --> 'host', 'port'
+# splitquery('/path?query') --> '/path', 'query'
+# splittag('/path#tag') --> '/path', 'tag'
+# splitattr('/path;attr1=value1;attr2=value2;...') ->
+#   '/path', ['attr1=value1', 'attr2=value2', ...]
+# splitvalue('attr=value') --> 'attr', 'value'
+# splitgophertype('/Xselector') --> 'X', 'selector'
+# unquote('abc%20def') -> 'abc def'
+# quote('abc def') -> 'abc%20def')
+try:
+unicode
+except NameError:
+def _is_unicode(x):
+return 0
+else:
+def _is_unicode(x):
+return isinstance(x, unicode)
+def toBytes(url):
+"""toBytes(u"URL") --> 'URL'."""
+# Most URL schemes require ASCII. If that changes, the conversion
+# can be relaxed
+if _is_unicode(url):
+try:
+url = url.encode("ASCII")
+except UnicodeError:
+raise UnicodeError("URL " + repr(url) +
+" contains non-ASCII characters")
+return url
+def unwrap(url):
+"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
+url = url.strip()
+if url[:1] == '<' and url[-1:] == '>':
+url = url[1:-1].strip()
+if url[:4] == 'URL:': url = url[4:].strip()
+return url
+_typeprog = None
+def splittype(url):
+"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
+global _typeprog
+if _typeprog is None:
+import re
+_typeprog = re.compile('^([^/:]+):')
+match = _typeprog.match(url)
+if match:
+scheme = match.group(1)
+return scheme.lower(), url[len(scheme) + 1:]
+return None, url
+_hostprog = None
+def splithost(url):
+"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
+global _hostprog
+if _hostprog is None:
+import re
+_hostprog = re.compile('^//([^/?]*)(.*)$')
+match = _hostprog.match(url)
+if match: return match.group(1, 2)
+return None, url
+_userprog = None
+def splituser(host):
+"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
+global _userprog
+if _userprog is None:
+import re
+_userprog = re.compile('^(.*)@(.*)$')
+match = _userprog.match(host)
+if match: return map(unquote, match.group(1, 2))
+return None, host
+_passwdprog = None
+def splitpasswd(user):
+"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
+global _passwdprog
+if _passwdprog is None:
+import re
+_passwdprog = re.compile('^([^:]*):(.*)$')
+match = _passwdprog.match(user)
+if match: return match.group(1, 2)
+return user, None
+# splittag('/path#tag') --> '/path', 'tag'
+_portprog = None
+def splitport(host):
+"""splitport('host:port') --> 'host', 'port'."""
+global _portprog
+if _portprog is None:
+import re
+_portprog = re.compile('^(.*):([0-9]+)$')
+match = _portprog.match(host)
+if match: return match.group(1, 2)
+return host, None
+_nportprog = None
+def splitnport(host, defport=-1):
+"""Split host and port, returning numeric port.
+Return given default port if no ':' found; defaults to -1.
+Return numerical port if a valid number are found after ':'.
+Return None if ':' but not a valid number."""
+global _nportprog
+if _nportprog is None:
+import re
+_nportprog = re.compile('^(.*):(.*)$')
+match = _nportprog.match(host)
+if match:
+host, port = match.group(1, 2)
+try:
+if not port: raise ValueError, "no digits"
+nport = int(port)
+except ValueError:
+nport = None
+return host, nport
+return host, defport
+_queryprog = None
+def splitquery(url):
+"""splitquery('/path?query') --> '/path', 'query'."""
+global _queryprog
+if _queryprog is None:
+import re
+_queryprog = re.compile('^(.*)\?([^?]*)$')
+match = _queryprog.match(url)
+if match: return match.group(1, 2)
+return url, None
+_tagprog = None
+def splittag(url):
+"""splittag('/path#tag') --> '/path', 'tag'."""
+global _tagprog
+if _tagprog is None:
+import re
+_tagprog = re.compile('^(.*)#([^#]*)$')
+match = _tagprog.match(url)
+if match: return match.group(1, 2)
+return url, None
+def splitattr(url):
+"""splitattr('/path;attr1=value1;attr2=value2;...') ->
+'/path', ['attr1=value1', 'attr2=value2', ...]."""
+words = url.split(';')
+return words[0], words[1:]
+_valueprog = None
+def splitvalue(attr):
+"""splitvalue('attr=value') --> 'attr', 'value'."""
+global _valueprog
+if _valueprog is None:
+import re
+_valueprog = re.compile('^([^=]*)=(.*)$')
+match = _valueprog.match(attr)
+if match: return match.group(1, 2)
+return attr, None
+def splitgophertype(selector):
+"""splitgophertype('/Xselector') --> 'X', 'selector'."""
+if selector[:1] == '/' and selector[1:2]:
+return selector[1], selector[2:]
+return None, selector
+_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
+_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+def unquote(s):
+"""unquote('abc%20def') -> 'abc def'."""
+res = s.split('%')
+for i in xrange(1, len(res)):
+item = res[i]
+try:
+res[i] = _hextochr[item[:2]] + item[2:]
+except KeyError:
+res[i] = '%' + item
+except UnicodeDecodeError:
+res[i] = unichr(int(item[:2], 16)) + item[2:]
+return "".join(res)
+def unquote_plus(s):
+"""unquote('%7e/abc+def') -> '~/abc def'"""
+s = s.replace('+', ' ')
+return unquote(s)
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+'abcdefghijklmnopqrstuvwxyz'
+'0123456789' '_.-')
+_safemaps = {}
+def quote(s, safe = '/'):
+"""quote('abc def') -> 'abc%20def'
+Each part of a URL, e.g. the path info, the query, etc., has a
+different set of reserved characters that must be quoted.
+RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+the following reserved characters.
+reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+"$" | ","
+Each of these characters is reserved in some component of a URL,
+but not necessarily in all of them.
+By default, the quote function is intended for quoting the path
+section of a URL.  Thus, it will not encode '/'.  This character
+is reserved, but in typical usage the quote function is being
+called on a path where the existing slash characters are used as
+reserved characters.
+"""
+cachekey = (safe, always_safe)
+try:
+safe_map = _safemaps[cachekey]
+except KeyError:
+safe += always_safe
+safe_map = {}
+for i in range(256):
+c = chr(i)
+safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+_safemaps[cachekey] = safe_map
+res = map(safe_map.__getitem__, s)
+return ''.join(res)
+def quote_plus(s, safe = ''):
+"""Quote the query fragment of a URL; replacing ' ' with '+'"""
+if ' ' in s:
+s = quote(s, safe + ' ')
+return s.replace(' ', '+')
+return quote(s, safe)
+def urlencode(query,doseq=0):
+"""Encode a sequence of two-element tuples or dictionary into a URL query string.
+If any values in the query arg are sequences and doseq is true, each
+sequence element is converted to a separate parameter.
+If the query arg is a sequence of two-element tuples, the order of the
+parameters in the output will match the order of parameters in the
+input.
+"""
+if hasattr(query,"items"):
+# mapping objects
+query = query.items()
+else:
+# it's a bother at times that strings and string-like objects are
+# sequences...
+try:
+# non-sequence items should not work with len()
+# non-empty strings will fail this
+if len(query) and not isinstance(query[0], tuple):
+raise TypeError
+# zero-length sequences of all types will get here and succeed,
+# but that's a minor nit - since the original implementation
+# allowed empty dicts that type of behavior probably should be
+# preserved for consistency
+except TypeError:
+ty,va,tb = sys.exc_info()
+raise TypeError, "not a valid non-string sequence or mapping object", tb
+l = []
+if not doseq:
+# preserve old behavior
+for k, v in query:
+k = quote_plus(str(k))
+v = quote_plus(str(v))
+l.append(k + '=' + v)
+else:
+for k, v in query:
+k = quote_plus(str(k))
+if isinstance(v, str):
+v = quote_plus(v)
+l.append(k + '=' + v)
+elif _is_unicode(v):
+# is there a reasonable way to convert to ASCII?
+# encode generates a string, but "replace" or "ignore"
+# lose information and "strict" can raise UnicodeError
+v = quote_plus(v.encode("ASCII","replace"))
+l.append(k + '=' + v)
+else:
+try:
+# is this a sufficient test for sequence-ness?
+x = len(v)
+except TypeError:
+# not a sequence
+v = quote_plus(str(v))
+l.append(k + '=' + v)
+else:
+# loop over the sequence
+for elt in v:
+l.append(k + '=' + quote_plus(str(elt)))
+return '&'.join(l)