qdaily_spider.py 文件源码

python
阅读 18 收藏 0 点赞 0 评论 0

项目:NewsScrapy 作者: yinzishao 项目源码 文件源码
def parse_article(self,response):
        #content,news_no,crawl_date
        item = response.meta.get("item",NewsItem())
        # news_date = item.get("news_date",None)
        # if news_date:
        #     struct_date = datetime.datetime.strptime(news_date,"%Y-%m-%d %H:%M:%S")
        #     delta = self.end_now-struct_date
        #     print delta.days
        #     if delta.days == self.end_day:
        #         raise CloseSpider('today scrapy end')
        soup =BeautifulSoup(response.body)
        author = soup.find("span",class_="name").text if soup.find("span",class_="name") else None
        abstract =  soup.find("p",class_="excerpt").text if soup.find("p",class_="excerpt") else None
        content = soup.find("div",class_="detail").text if soup.find("div",class_="detail") else None
        news_no = response.url.split("/")[-1][:-5]
        item["author"] = author
        item["abstract"] = abstract
        item["content"] = content
        item["crawl_date"] = NOW
        item["news_no"] = news_no
        yield item
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号