def __init__(self, conf=None, conn=None, page_from=None, page_to=None):
# Save conf/conn
self.conf = conf
self.conn = conn
# Default values
if page_from is None:
page_from = '1'
if page_to is None:
page_to = '1'
# Make start urls
self.start_urls = _make_start_urls(
prefix='https://upload.umin.ac.jp/cgi-open-bin/ctr_e/index.cgi',
page_from=page_from)
# Make rules
self.rules = [
Rule(LinkExtractor(
allow=r'cgi-open-bin/ctr_e/ctr_view.cgi',
), callback=parse_record),
Rule(LinkExtractor(
allow=r'page=\d+',
process_value=partial(_process_url, page_from, page_to),
)),
]
# Inherit parent
super(Spider, self).__init__()
# Internal
评论列表
文章目录