def process_train_data(self, input_file, onto_aware):
print >>sys.stderr, "Reading training data"
label_ind = []
tagged_sentences = []
for line in open(input_file):
lnstrp = line.strip()
label, tagged_sentence = lnstrp.split("\t")
if label not in self.label_map:
self.label_map[label] = len(self.label_map)
label_ind.append(self.label_map[label])
tagged_sentences.append(tagged_sentence)
# Shuffling so that when Keras does validation split, it is not always at the end.
sentences_and_labels = zip(tagged_sentences, label_ind)
random.shuffle(sentences_and_labels)
tagged_sentences, label_ind = zip(*sentences_and_labels)
print >>sys.stderr, "Indexing training data"
train_inputs = self.data_processor.prepare_paired_input(tagged_sentences, onto_aware=onto_aware,
for_test=False, remove_singletons=True)
train_labels = self.data_processor.make_one_hot(label_ind)
return train_inputs, train_labels
评论列表
文章目录