def parse_list(self, response):
url = response.meta['splash']['args']['url']
pattern = re.compile(r'http://www.mogujie.com/book/\w+/\d+/')
if (pattern.match(url)):
page = int(pattern.split(url)[1])
url = pattern.findall(url)[0]
page += 1
url = url + str(page)
else:
url = url + '/2'
print '+++++++++++++++++++++++++ Next url:', url
req = SplashRequest(url = url, callback = self.parse_list)
yield req
pattern_detail = re.compile(r'http://shop.mogujie.com/detail/.{7}')
for item_url in pattern_detail.findall(response.body):
req = Request(url = item_url, callback = self.parse_item)
yield req
评论列表
文章目录