def parse(self,response):
yield scrapy.FormRequest(self.start_url,formdata={'page':'1'},callback=self.parse_newest)
soup = BeautifulSoup(response.body,"lxml")
index_list = soup.find(class_="index-first-list")("li") if soup.find(class_="index-first-list") else None
for news in index_list:
title = news.h2.a.string if news.h2.a else None
abstract = news.p.string if news.p else None
news_url = self.domain+news.a.get("href",None) if news.a else None
item = NewsItem(title=title,abstract=abstract,news_url=news_url,catalogue=u"????")
request = scrapy.Request(news_url,self.parse_news,dont_filter=True)
request.meta["item"] = item
yield request
#???????
评论列表
文章目录