def keep_redirecting(r, my_pub):
# don't read r.content unless we have to, because it will cause us to download the whole thig instead of just the headers
# 10.5762/kais.2016.17.5.316
if ("content-length" in r.headers):
# manually follow javascript if that's all that's in the payload
file_size = int(r.headers["content-length"])
if file_size < 500:
matches = re.findall(ur"<script>location.href='(.*)'</script>", r.content, re.IGNORECASE)
if matches:
redirect_url = matches[0]
if redirect_url.startswith(u"/"):
redirect_url = get_link_target(redirect_url, r.url)
return redirect_url
# 10.1097/00003643-201406001-00238
if my_pub and my_pub.is_same_publisher("Ovid Technologies (Wolters Kluwer Health)"):
matches = re.findall(ur"OvidAN = '(.*?)';", r.content, re.IGNORECASE)
if matches:
an_number = matches[0]
redirect_url = "http://content.wkhealth.com/linkback/openurl?an={}".format(an_number)
return redirect_url
return None
评论列表
文章目录