All Samples(1957) | Call(1402) | Derive(0) | Import(555)
Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
:///?#
Return a 5-tuple: (scheme, netloc, path, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
if cached:
return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
netloc = query = fragment = ''
i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
scheme = url[:i].lower()
url = url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v
for c in url[:i]:
if c not in scheme_chars:
break
else:
scheme, url = url[:i].lower(), url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and scheme in uses_fragment and '#' in url:
url, fragment = url.split('#', 1)
if scheme in uses_query and '?' in url:
url, query = url.split('?', 1)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v
__date__ = '$Date: 2010-02-21 23:49:22 $'.split()[1].replace('/', '-')
__version__ = '$Revision: 1.48 $'
from urlparse import urlsplit, urljoin
from htmlentitydefs import name2codepoint
import sys, re
will be a Unicode string, decoded using the given charset. Giving the
'charset' argument overrides any received 'charset' parameter; a charset
of RAW ensures that the content is left undecoded in an 8-bit string."""
scheme, host, path, query, fragment = urlsplit(url)
host = host.split('@')[-1]
path = path or '/'
def setcookie(self, cookieline):
"""Put a cookie in this session's cookie jar. 'cookieline' should
have the format "<name>=<value>; domain=<domain>; path=<path>"."""
scheme, host, path, query, fragment = urlsplit(self.url)
host = host.split('@')[-1].split(':')[0]
setcookies(self.cookiejar, host, [cookieline])
src/b/u/bunny1-HEAD/src/b1_example.py bunny1(Download)
def rewrite_tld(url, new_tld):
"""changes the last thing after the dot in the netloc in a URL"""
(scheme, netloc, path, query, fragment) = urlparse.urlsplit(url)
domain = netloc.split(".")
# this is just an example so we naievely assume the TLD doesn't
# include any dots (so this breaks if you try to rewrite .co.jp
src/p/y/python-cookbook-HEAD/cb2_examples/cb2_14_4_sol_1.py python-cookbook(Download)
def httpExists(url):
host, path = urlparse.urlsplit(url)[1:3]
if ':' in host:
# port specified, try to use it
host, port = host.split(':', 1)
try:
port = int(port)
src/t/w/twitstream-HEAD/examples/warehouse.py twitstream(Download)
#!/usr/bin/env python import sys import twitstream from urlparse import urlunsplit, urlsplit from binascii import unhexlify, hexlify
def urlparse(self, url):
(scheme, foo, rem, bar, baz) = urlsplit(url)
rem = rem.lstrip('/')
(locport, foo, path) = rem.partition('/')
(location, foo, port) = locport.partition(':')
if not port: port = 0
return (scheme, location, int(port), path)
src/p/y/pyitc-HEAD/mechanize/examples/pypi.py pyitc(Download)
browser.submit()
browser.follow_link(text_regex="mechanize-?(.*)")
link = browser.find_link(text_regex=r"\.tar\.gz")
filename = os.path.basename(urlparse.urlsplit(link.url)[2])
if os.path.exists(filename):
sys.exit("%s already exists, not grabbing" % filename)
browser.retrieve(link.url, filename)
src/m/e/mechanize-HEAD/examples/pypi.py mechanize(Download)
browser.submit()
browser.follow_link(text_regex="mechanize-?(.*)")
link = browser.find_link(text_regex=r"\.tar\.gz")
filename = os.path.basename(urlparse.urlsplit(link.url)[2])
if os.path.exists(filename):
sys.exit("%s already exists, not grabbing" % filename)
browser.retrieve(link.url, filename)
src/m/e/mechanize-0.2.2/examples/pypi.py mechanize(Download)
browser.submit()
browser.follow_link(text_regex="mechanize-?(.*)")
link = browser.find_link(text_regex=r"\.tar\.gz")
filename = os.path.basename(urlparse.urlsplit(link.url)[2])
if os.path.exists(filename):
sys.exit("%s already exists, not grabbing" % filename)
browser.retrieve(link.url, filename)
src/p/y/pyrocore-0.3.8/src/pyrocore/scripts/chtor.py pyrocore(Download)
filter_url_prefix = None
if self.options.reannounce:
# <scheme>://<netloc>/<path>?<query>
filter_url_prefix = urlparse.urlsplit(self.options.reannounce, allow_fragments=False)
filter_url_prefix = urlparse.urlunsplit((
filter_url_prefix.scheme, filter_url_prefix.netloc, '/', '', ''
))
src/b/u/buildbot-HEAD/master/contrib/trac/bbwatcher/api.py buildbot(Download)
def __init__(self, url):
try:
scheme, loc, _, _, _ = urlparse.urlsplit(url, scheme='http')
url = '%s://%s/xmlrpc'%(scheme, loc)
self.server = xmlrpclib.ServerProxy(url)
except Exception, e:
raise ValueError('Invalid BuildBot XML-RPC server %s: %s'%(url, e))
src/p/y/Python_WebDAV_Library-0.2.0/src/webdav/WebdavClient.py Python_WebDAV_Library(Download)
from davlib import XML_CONTENT_TYPE from urlparse import urlsplit import re import types
"""
assert connection == None or isinstance(connection, Connection)
parts = urlsplit(url, allow_fragments=False)
self.path = parts[2]
self.validateResourceNames = validateResourceNames
@raise ValueError: If the URL does not contain valid/usable content.
"""
parts = urlsplit(url, allow_fragments=False)
if len(parts[0]) == 0 or len(parts[1]) == 0 or len(parts[2]) == 0:
raise ValueError("Invalid URL: " + repr(url))
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 Next