def __init__(self, conf=None, conn=None, date_from=None, date_to=None):
# Save conf/conn
self.conf = conf
self.conn = conn
# Make start urls
self.start_urls = _make_start_urls(
prefix='http://www.anzctr.org.au/TrialSearch.aspx',
date_from=date_from, date_to=date_to)
# Make rules
self.rules = [
Rule(LinkExtractor(
allow=r'Trial/Registration/TrialReview.aspx',
process_value=lambda value: value.replace('http', 'https', 1),
), callback=parse_record),
Rule(LinkExtractor(
allow=r'page=\d+',
)),
]
# Inherit parent
super(Spider, self).__init__()
# Internal
评论列表
文章目录