def parse(s):
'''
Parse a path given as a url. Accepts strings of the form:
s3://bucket-name/path/to/key
file:///path/to/file
/absolution/path/to/file
relative/path/to/file
~/path/from/home/dir/to/file
To avoid surprises, s3:// and file:// URLs should not
include ;, ? or #. You should URL-encode such paths.
Return value is a ParseResult; one of the following:
('s3', bucketname, valid_s3_key, ...)
('file', '', absolute_path_for_current_filesystem, ...)
'''
import re
from urlparse import urlparse, ParseResult
if not isinstance(s, basestring):
raise ValueError("An S3 path must be a string, got %s" % s.__class__.__name__)
is_windows_path = (len(s) >= 2 and s[1] == ':')
if is_windows_path:
scheme, netloc, s3path = 'file', '', s
else:
scheme, netloc, s3path, params, query, fragment = urlparse(s)
if any([params, query, fragment]):
raise ValueError("Invalid URI: %s" % s)
if any(char in ';?#' for char in s):
raise ValueError("Invalid URI: %s" % s)
try:
s3path.encode('UTF-8')
except (UnicodeDecodeError, UnicodeEncodeError):
raise ValueError("Invalid URI (bad unicode): %s" % s)
# If somehow something ever gets uploaded with binary in the
# key, this seems to be the only way to fix it:
# `s3cmd fixbucket s3://bodylabs-korper-assets`
if re.match(r'/\w:', s3path): # urlparse, given file:///C:\foo parses us to /C:\foo, so on reconstruction (on windows) we get C:\C:\foo.
s3path = s3path[1:]
is_windows_path = True
if scheme == '':
scheme = 'file'
if scheme == 'file' and not is_windows_path:
if s3path.endswith(os.sep) or s3path.endswith('/'):
# os.path.abspath strips the trailing '/' so we need to put it back
s3path = os.path.join(os.path.abspath(os.path.expanduser(s3path)), '')
else:
s3path = os.path.abspath(os.path.expanduser(s3path))
if scheme == 's3' and netloc == '':
raise ValueError('s3 urls must specify the bucket')
return ParseResult(scheme, netloc, s3path, params=None, query=None, fragment=None) # pylint: disable=too-many-function-args,unexpected-keyword-arg
评论列表
文章目录