def __init__(self, conf=None, conn=None):
# Save conf/conn
self.conf = conf
self.conn = conn
# Make urls
self.start_urls = [
'http://www.pfizer.com/research/clinical_trials/find_a_trial?recr=0',
]
# Make rules
self.rules = [
Rule(LinkExtractor(
allow=r'find_a_trial/NCT\d+',
), callback=parse_record),
Rule(LinkExtractor(
allow=r'page=\d+',
)),
]
# Inherit parent
super(Spider, self).__init__()
评论列表
文章目录