path.py 文件源码-python代码片段

def parse(s):
    '''
    Parse a path given as a url. Accepts strings of the form:

       s3://bucket-name/path/to/key
       file:///path/to/file
       /absolution/path/to/file
       relative/path/to/file
       ~/path/from/home/dir/to/file

       To avoid surprises, s3:// and file:// URLs should not
       include ;, ? or #. You should URL-encode such paths.

    Return value is a ParseResult; one of the following:

       ('s3', bucketname, valid_s3_key, ...)
       ('file', '', absolute_path_for_current_filesystem, ...)

    '''
    import re
    from urlparse import urlparse, ParseResult

    if not isinstance(s, basestring):
        raise ValueError("An S3 path must be a string, got %s" % s.__class__.__name__)

    is_windows_path = (len(s) >= 2 and s[1] == ':')
    if is_windows_path:
        scheme, netloc, s3path = 'file', '', s
    else:
        scheme, netloc, s3path, params, query, fragment = urlparse(s)
        if any([params, query, fragment]):
            raise ValueError("Invalid URI: %s" % s)
        if any(char in ';?#' for char in s):
            raise ValueError("Invalid URI: %s" % s)
        try:
            s3path.encode('UTF-8')
        except (UnicodeDecodeError, UnicodeEncodeError):
            raise ValueError("Invalid URI (bad unicode): %s" % s)
            # If somehow something ever gets uploaded with binary in the
            # key, this seems to be the only way to fix it:
            # `s3cmd fixbucket s3://bodylabs-korper-assets`
    if re.match(r'/\w:', s3path): # urlparse, given file:///C:\foo parses us to /C:\foo, so on reconstruction (on windows) we get C:\C:\foo.
        s3path = s3path[1:]
        is_windows_path = True
    if scheme == '':
        scheme = 'file'
    if scheme == 'file' and not is_windows_path:
        if s3path.endswith(os.sep) or s3path.endswith('/'):
            # os.path.abspath strips the trailing '/' so we need to put it back
            s3path = os.path.join(os.path.abspath(os.path.expanduser(s3path)), '')
        else:
            s3path = os.path.abspath(os.path.expanduser(s3path))
    if scheme == 's3' and netloc == '':
        raise ValueError('s3 urls must specify the bucket')
    return ParseResult(scheme, netloc, s3path, params=None, query=None, fragment=None) # pylint: disable=too-many-function-args,unexpected-keyword-arg