def _parse_url(self,dst,src):
"""
Check wether target url 'dst' is in the same domain(include port) with url 'src', and
convert url into complete url without params.
Returns:
String of complete url with query params if it has. if target url is not in the
same domain, return '';
"""
LOG.debug('detecting url: '+dst)
s_parsed=urlparse.urlparse(src)
s_scheme=s_parsed.scheme
s_netloc=s_parsed.netloc
s_cur_dir=s_parsed.path
if s_cur_dir[-1]!='/':
s_cur_dir='/'.join(s_cur_dir.split('/')[:-1])
else:
s_cur_dir=s_cur_dir[:-1]
d_parsed=urlparse.urlparse(dst)
d_scheme=d_parsed.scheme
if d_parsed.netloc.find(':')==-1 and d_parsed.netloc!='':
if d_scheme=='http':
d_netloc=d_parsed.netloc+':80'
elif d_scheme=='https':
d_netloc=d_parsed.netloc+':443'
elif d_scheme=='':
d_netloc=d_parsed.netloc+':80' if s_scheme=='http' else d_parsed.netloc+':443'
else:
d_netloc=d_parsed.netloc
else:
d_netloc=d_parsed.netloc
# add '/' as prefix if the path does not starts with '/'
if d_parsed.path!='':
d_path='/'+d_parsed.path if d_parsed.path[0]!='/' else d_parsed.path
else:
d_path='/'
d_query=d_parsed.query
# if it is a relative url
if d_netloc=='':
return urlparse.ParseResult(s_scheme,s_netloc,s_cur_dir+d_path,'',d_query,'').geturl()
elif d_netloc==s_netloc and (d_scheme==s_scheme or d_scheme==''):
return urlparse.ParseResult(s_scheme,s_netloc,d_path,'',d_query,'').geturl()
else:
return ''
评论列表
文章目录