def get_url(betamax_session):
def _get_url(url, request_kwargs={}):
'''Returns a scrapy.html.HtmlResponse with the contents of the received
url.
Note that the session is kept intact among multiple calls to this
method (i.e. cookies are passed over).
We also don't verify SSL certificates, because Takeda's certificate is
invalid. If they become valid, we can resume verifying the
certificates.
'''
response = betamax_session.get(url, verify=False)
scrapy_response = HtmlResponse(
url=str(response.url),
body=response.content,
)
scrapy_response.request = Request(url, **request_kwargs)
return scrapy_response
return _get_url
评论列表
文章目录