def generate_article_comment_sum(self, response):
com_sum_script = response.xpath("//html//script[1]//text()").extract()
com_sum = 0
if len(com_sum_script) > 1:
com_sum_script = re.search(u'[\d]+', com_sum_script[1])
try:
com_sum = com_sum_script.group(0)
except:
com_sum = ''
ithome_item = YIthome2Item()
ithome_item._id = re.search(u'[\d]+', response.url).group(0)
ithome_item.replies = str(com_sum)
MongoClient.save_ithome_com_sum(ithome_item)
hash_key = response.xpath('//input[@id="hash"]/@value').extract()
if len(hash_key) > 0:
com_url = \
"http://dyn.ithome.com/ithome/getajaxdata.aspx?newsID=" + response.meta['article_id']
com_url += "&type=commentpage&order=false&hash="+hash_key[0]+"&page="
yield scrapy.Request(
com_url + str(1),
dont_filter='true',
callback=self.generate_article_comment
)
评论列表
文章目录