def parse_list(self, response):
selector = Selector(response)
items_selector = selector.xpath('//div[@class="xgt_topic"]')
for item_selector in items_selector:
# /topic/7334.html
href = item_selector.xpath('div//a/@href').extract()[0]
href = href.strip()
# http://xiaoguotu.to8to.com/topic/7334.html
next_url = (constant.PROTOCOL_HTTP + self.start_url_domain + href)
if self.design_topic_service.is_duplicate_url(next_url):
continue
yield scrapy.Request(next_url, self.parse_content)
design_topic_spider.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录