def parse_article(self,response):
#content,news_no,crawl_date
item = response.meta.get("item",NewsItem())
# news_date = item.get("news_date",None)
# if news_date:
# struct_date = datetime.datetime.strptime(news_date,"%Y-%m-%d %H:%M:%S")
# delta = self.end_now-struct_date
# print delta.days
# if delta.days == self.end_day:
# raise CloseSpider('today scrapy end')
soup =BeautifulSoup(response.body)
author = soup.find("span",class_="name").text if soup.find("span",class_="name") else None
abstract = soup.find("p",class_="excerpt").text if soup.find("p",class_="excerpt") else None
content = soup.find("div",class_="detail").text if soup.find("div",class_="detail") else None
news_no = response.url.split("/")[-1][:-5]
item["author"] = author
item["abstract"] = abstract
item["content"] = content
item["crawl_date"] = NOW
item["news_no"] = news_no
yield item
评论列表
文章目录