def get_sequences(raw_file, word_count):
label_list = []
raw_sequences = []
input_file = open(raw_file)
for line in input_file:
line_parts = line.strip().split('\t')
label = line_parts[0]
label_list.append(label)
sentence = line_parts[1]
word_seq = text.text_to_word_sequence(sentence)
raw_sequences.append(word_seq)
for w in word_seq:
if w in word_count:
word_count[w] += 1
else:
word_count[w] = 1
input_file.close()
return label_list, raw_sequences, word_count
# index is start from 1
评论列表
文章目录