def generate_training_data(self, options):
"""
set self.dictionary, self.lable_types and
generate train_x(y) and test_x(y)
"""
input_table = InputTable(options['threashold'])
(training, test) = input_table.fetch_data(options['ratio_test'],
options['seed'])
word_vecs_train = self.convert_to_word_vecs(training)
topic_vecs_train = self.convert_to_topic_vecs(training)
word_vecs_test = self.convert_to_word_vecs(test)
topic_vecs_test = self.convert_to_topic_vecs(test)
# use dictionary and topic_types of training set
dictionary = corpora.Dictionary(word_vecs_train)
all_topics = list(set(topic_vecs_train))
x_train = self.adjust_x_format(dictionary, word_vecs_train)
y_train = self.adjust_y_format(all_topics, topic_vecs_train)
x_test = self.adjust_x_format(dictionary, word_vecs_test)
y_test = self.adjust_y_format(all_topics, topic_vecs_test)
return (x_train, y_train, x_test, y_test, dictionary, all_topics)
评论列表
文章目录