def train_feature_finder(self, training_db, clf):
training_sentences = []
c = 0
training_classes = []
self.class_names = []
self.vectorizer = CountVectorizer(analyzer = "word", \
tokenizer = None, \
preprocessor = None, \
stop_words = None, \
max_features = 500)
for key, value in training_db.iteritems():
training_sentences += value
training_classes += [c for i in range(len(value))]
c+=1
self.class_names.append(key)
train_data_features = self.vectorizer.fit_transform(training_sentences)
train_data_features = train_data_features.toarray()
clf = clf.fit( train_data_features, training_classes)
return clf
评论列表
文章目录