• Facebook
  • Twitter
  • Reddit
  • StumbleUpon
  • Digg
  • email

All Samples(1957)  |  Call(1402)  |  Derive(0)  |  Import(555)
Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
Return a 5-tuple: (scheme, netloc, path, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes.

        def urlsplit(url, scheme='', allow_fragments=True):
    """Parse a URL into 5 components:
    :///?#
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    allow_fragments = bool(allow_fragments)
    key = url, scheme, allow_fragments, type(url), type(scheme)
    cached = _parse_cache.get(key, None)
    if cached:
        return cached
    if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
        clear_cache()
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        if url[:i] == 'http': # optimize the common case
            scheme = url[:i].lower()
            url = url[i+1:]
            if url[:2] == '//':
                netloc, url = _splitnetloc(url, 2)
                if (('[' in netloc and ']' not in netloc) or
                        (']' in netloc and '[' not in netloc)):
                    raise ValueError("Invalid IPv6 URL")
            if allow_fragments and '#' in url:
                url, fragment = url.split('#', 1)
            if '?' in url:
                url, query = url.split('?', 1)
            v = SplitResult(scheme, netloc, url, query, fragment)
            _parse_cache[key] = v
            return v
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            scheme, url = url[:i].lower(), url[i+1:]

    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)
        if (('[' in netloc and ']' not in netloc) or
                (']' in netloc and '[' not in netloc)):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and scheme in uses_fragment and '#' in url:
        url, fragment = url.split('#', 1)
    if scheme in uses_query and '?' in url:
        url, query = url.split('?', 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    _parse_cache[key] = v
    return v
        


src/a/r/artie-HEAD/example/applications/scrape.py   artie(Download)
__date__ = '$Date: 2010-02-21 23:49:22 $'.split()[1].replace('/', '-')
__version__ = '$Revision: 1.48 $'
 
from urlparse import urlsplit, urljoin
from htmlentitydefs import name2codepoint
import sys, re
 
    will be a Unicode string, decoded using the given charset.  Giving the
    'charset' argument overrides any received 'charset' parameter; a charset
    of RAW ensures that the content is left undecoded in an 8-bit string."""
    scheme, host, path, query, fragment = urlsplit(url)
    host = host.split('@')[-1]
    path = path or '/'
 
    def setcookie(self, cookieline):
        """Put a cookie in this session's cookie jar.  'cookieline' should
        have the format "<name>=<value>; domain=<domain>; path=<path>"."""
        scheme, host, path, query, fragment = urlsplit(self.url)
        host = host.split('@')[-1].split(':')[0]
        setcookies(self.cookiejar, host, [cookieline])
 

src/b/u/bunny1-HEAD/src/b1_example.py   bunny1(Download)
def rewrite_tld(url, new_tld):
    """changes the last thing after the dot in the netloc in a URL"""
    (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url)
    domain = netloc.split(".")
 
    # this is just an example so we naievely assume the TLD doesn't
    # include any dots (so this breaks if you try to rewrite .co.jp

src/p/y/python-cookbook-HEAD/cb2_examples/cb2_14_4_sol_1.py   python-cookbook(Download)
def httpExists(url):
    host, path = urlparse.urlsplit(url)[1:3]
    if ':' in host:
        # port specified, try to use it
        host, port = host.split(':', 1)
        try:
            port = int(port)

src/t/w/twitstream-HEAD/examples/warehouse.py   twitstream(Download)
#!/usr/bin/env python
 
import sys
import twitstream
from urlparse import urlunsplit, urlsplit
from binascii import unhexlify, hexlify
 
    def urlparse(self, url):
        (scheme, foo, rem, bar, baz) = urlsplit(url)
        rem = rem.lstrip('/')
        (locport, foo, path) = rem.partition('/')
        (location, foo, port) = locport.partition(':')
        if not port: port = 0
        return (scheme, location, int(port), path)

src/p/y/pyitc-HEAD/mechanize/examples/pypi.py   pyitc(Download)
    browser.submit()
    browser.follow_link(text_regex="mechanize-?(.*)")
    link = browser.find_link(text_regex=r"\.tar\.gz")
    filename = os.path.basename(urlparse.urlsplit(link.url)[2])
    if os.path.exists(filename):
        sys.exit("%s already exists, not grabbing" % filename)
    browser.retrieve(link.url, filename)

src/m/e/mechanize-HEAD/examples/pypi.py   mechanize(Download)
    browser.submit()
    browser.follow_link(text_regex="mechanize-?(.*)")
    link = browser.find_link(text_regex=r"\.tar\.gz")
    filename = os.path.basename(urlparse.urlsplit(link.url)[2])
    if os.path.exists(filename):
        sys.exit("%s already exists, not grabbing" % filename)
    browser.retrieve(link.url, filename)

src/m/e/mechanize-0.2.2/examples/pypi.py   mechanize(Download)
    browser.submit()
    browser.follow_link(text_regex="mechanize-?(.*)")
    link = browser.find_link(text_regex=r"\.tar\.gz")
    filename = os.path.basename(urlparse.urlsplit(link.url)[2])
    if os.path.exists(filename):
        sys.exit("%s already exists, not grabbing" % filename)
    browser.retrieve(link.url, filename)

src/p/y/pyrocore-0.3.8/src/pyrocore/scripts/chtor.py   pyrocore(Download)
        filter_url_prefix = None
        if self.options.reannounce:
            # <scheme>://<netloc>/<path>?<query>
            filter_url_prefix = urlparse.urlsplit(self.options.reannounce, allow_fragments=False)
            filter_url_prefix = urlparse.urlunsplit((
                filter_url_prefix.scheme, filter_url_prefix.netloc, '/', '', ''
            ))

src/b/u/buildbot-HEAD/master/contrib/trac/bbwatcher/api.py   buildbot(Download)
	def __init__(self, url):
		try:
			scheme, loc, _, _, _ = urlparse.urlsplit(url, scheme='http')
			url = '%s://%s/xmlrpc'%(scheme, loc)
			self.server = xmlrpclib.ServerProxy(url)
		except Exception, e:
			raise ValueError('Invalid BuildBot XML-RPC server %s: %s'%(url, e))

src/p/y/Python_WebDAV_Library-0.2.0/src/webdav/WebdavClient.py   Python_WebDAV_Library(Download)
 
from davlib import XML_CONTENT_TYPE
 
from urlparse import urlsplit
import re
import types
 
        """
 
        assert connection == None or isinstance(connection, Connection)
        parts = urlsplit(url, allow_fragments=False)
        self.path = parts[2]
        self.validateResourceNames = validateResourceNames
 
    @raise ValueError: If the URL does not contain valid/usable content.
    """
 
    parts = urlsplit(url, allow_fragments=False)
    if len(parts[0]) == 0 or len(parts[1]) == 0 or len(parts[2]) == 0:
        raise ValueError("Invalid URL: " + repr(url))
 

  1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9  Next