def parse_news(self,response):
item = response.meta.get("item",None)
# #??????????????????????
# news_date = item.get("news_date",None)
# if news_date:
# struct_date = datetime.datetime.strptime(news_date,"%Y-%m-%d")
# news_date = struct_date.strftime("%Y-%m-%d %H:%M:%S")
#
# delta = self.end_now-struct_date
# if delta.days == self.end_day:
# # pass
# raise CloseSpider('today scrapy end')
soup = BeautifulSoup(response.body)
news_content_group = soup.find("div",class_="entry-content group")
#??????
news_content_group.find("div",class_="related_posts").replace_with("")
content = news_content_group.text.strip()
item["content"] = content
item["catalogue"] = u"????"
yield item
评论列表
文章目录