def gen_item_comment(self, response, is_first=False):
comment = []
new_comment = {}
comments_data = []
rep_time_list = response.xpath('//div[@class="authi"]/em').extract()
if len(rep_time_list) == 0:
return comment
for indexi, content in enumerate(response.xpath('//div[@class="pct"]//table[1]').extract()):
if is_first and indexi == 0:
continue
soup = BeautifulSoup(content, 'lxml')
[s.extract() for s in soup('script')] # remove script tag
c = StrClean.clean_comment(soup.get_text())
time_index = indexi
if time_index >= len(rep_time_list):
rep_time = self.format_rep_date(rep_time_list[-1])
else:
rep_time = self.format_rep_date(rep_time_list[time_index])
comments_data.append({'content': c, 'reply_time': rep_time})
new_comment['url'] = response.url
new_comment['comments_data'] = comments_data
comment.append(new_comment)
return comment
评论列表
文章目录