def parse_detail(self,response):
# article_item = JobBoleArticleItem()
# #????????
front_image_url = response.meta.get("front_image_url", "") #?????
# title = response.xpath('//div[@class="entry-header"]/h1/text()').extract()[0]
# create_date = response.xpath('//p[@class="entry-meta-hide-on-mobile"]/text()').extract()[0].strip().replace("·","").strip()
# praise_nums = response.xpath('//span[contains(@class, "vote-post-up")]/h10/text()').extract()[0]
# fav_nums = response.xpath('//span[contains(@class, "bookmark-btn")]/text()').extract()[0]
# match_re = re.match(r".*?(\d+).*",fav_nums)
# if match_re:
# fav_nums = int(match_re.group(1))
# else:
# fav_nums = 0
# comments_nums = response.xpath('//a[@href="#article-comment"]/span/text()').extract()[0]
# match_re = re.match(r".*?(\d+).*",comments_nums)
# if match_re:
# comments_nums = int(match_re.group(1))
# else:
# comments_nums = 0
# content = response.xpath('//div[@class="entry"]').extract()[0]
# tag_list = response.xpath('//p[@class="entry-meta-hide-on-mobile"]/a/text()').extract()
# tag_list = [element for element in tag_list if not element.strip().endswith("??")]
# tags = ",".join(tag_list)
#
# article_item["url_object_id"] = get_md5(response.url)
# article_item["title"] = title
# article_item["url"] = response.url
# try:
# create_date = datetime.datetime.strptime(create_date,"%Y/%m/%d").date()
# except Exception as e:
# create_date = datetime.datetime.now().date()
# article_item["create_date"] = create_date
# article_item["front_image_url"] = [front_image_url]
# article_item["praise_nums"] = praise_nums
# article_item["comments_nums"] = comments_nums
# article_item["fav_nums"] = fav_nums
# article_item["tags"] = tags
# article_item["content"] = content
#??item loader??item
item_loader = ArticleItemLoader(item = JobBoleArticleItem(),response = response)
item_loader.add_value("url",response.url)
item_loader.add_xpath("title",'//div[@class="entry-header"]/h1/text()')
item_loader.add_value("url_object_id",get_md5(response.url))
item_loader.add_xpath("create_date",'//p[@class="entry-meta-hide-on-mobile"]/text()')
item_loader.add_xpath("praise_nums",'//span[contains(@class, "vote-post-up")]/h10/text()')
item_loader.add_value("front_image_url",[front_image_url])
item_loader.add_xpath("fav_nums",'//span[contains(@class, "bookmark-btn")]/text()')
item_loader.add_xpath("comments_nums",'//a[@href="#article-comment"]/span/text()')
item_loader.add_xpath("tags",'//p[@class="entry-meta-hide-on-mobile"]/a/text()')
item_loader.add_xpath("content",'//div[@class="entry"]')
article_item = item_loader.load_item()
yield article_item
评论列表
文章目录