def get_review_sentences():
'''
Read the yelp review and return after sentence segmentattion
:return:
'''
review_file = io.open(FULL_YELP_REVIEW_PATH, 'r', encoding='utf-8')
count_sentence = 0
sentences = []
for line in review_file:
json_review = json.loads(line.strip())
text = json_review.get("text").replace('\n','').lower()
raw_sentences = sent_tokenize(text)
for raw_sentence in raw_sentences:
if len(raw_sentence.strip()) > 0:
sent_tokens = word_tokenize(raw_sentence)
sentences.append(sent_tokens)
return sentences
评论列表
文章目录