def sitesManager( media_url ):
#picks which class will handle the media identification and extraction for website_name
#first resolve url shortener
shorteners=['bit.ly','goo.gl','tinyurl.com']
if any(shortener in media_url for shortener in shorteners):
#v=sitesBase.requests_get('https://unshorten.me/s/'+ urllib.quote_plus( media_url ) )
v = requests.head( media_url, timeout=REQUEST_TIMEOUT, allow_redirects=True )
log(' short url(%s)=%s'%(media_url,repr(v.url)))
media_url=v.url
for subcls in sitesBase.__subclasses__():
regex=subcls.regex
if regex:
match=re.compile( regex , re.I).findall( media_url )
#log("testing:{0}[{1}] {2}".format(media_url,regex, repr(match)) )
if match :
return subcls( media_url )
评论列表
文章目录