def generate_vocabulary(self, review_summary_file):
"""
:param review_summary_file:
:return:
"""
self.rev_sum_pair = pd.read_csv(review_summary_file, header=0).values
for review,summary in self.rev_sum_pair:
rev_lst = wordpunct_tokenize(review)
sum_lst = wordpunct_tokenize(summary)
self.__add_list_to_dict(rev_lst)
self.__add_list_to_dict(sum_lst)
# Now store the "" empty string as the last word of the voacabulary
self.map[""] = len(self.map)
self.revmap[len(self.map)] = ""
评论列表
文章目录