def detect_name(url, title):
urlp = urllib.parse.urlparse(url)
if urlp.netloc == 'github.com':
return urlp.path.strip('/').split('/')[1].lower()
else:
urlpath = os.path.splitext(urlp.path.strip('/'))[0].lower().split('/')
urlkwd = [x for x in urlpath if x not in URL_FILTERED and not RE_IGN.match(x)]
titlel = title.lower()
candidates = []
for k in urlkwd:
if k in titlel:
candidates.append(k)
if candidates:
return candidates[-1]
else:
host = urlp.hostname.split('.')
cand2 = [x for x in urlp.hostname.split('.') if x not in URL_FILTERED]
if cand2:
return cand2[0]
else:
return host[-2]
评论列表
文章目录