def process_request(self, request, spider):
if self.col.find_one({'$and': [
{'host': spider.name},
{'url': request.url},
# {'download': {'$in': [0, 1, 2]}}
{'download': {'$ne': -1}},
]}):
logging.warning('the page is crawled, url is {0}'.format(request.url))
raise IgnoreRequest()
return None
评论列表
文章目录