def fit_voting(self):
voting = 'soft'
names = [
# 'svm(word_n_grams,char_n_grams,all_caps,hashtags,punctuations,punctuation_last,emoticons,emoticon_last,'
# 'elongated,negation_count)',
# 'logreg(w2v_doc)',
# 'logreg(w2v_word_avg_google)',
'word2vec_bayes',
'cnn_word(embedding=google)',
'rnn_word(embedding=google)',
]
classifiers = [ExternalModel({
self.val_docs: os.path.join(self.data_dir, 'results/val/{}.json'.format(name)),
self.test_docs: os.path.join(self.data_dir, 'results/test/{}.json'.format(name)),
}) for name in names]
all_scores = []
for classifier in classifiers:
scores = classifier.predict_proba(self.val_docs)
if voting == 'hard':
scores = Binarizer(1 / 3).transform(scores)
all_scores.append(scores)
all_scores = np.array(all_scores)
all_scores_first, all_scores_rest = all_scores[0], all_scores[1:]
le = LabelEncoder().fit(self.classes_)
val_label_indexes = le.transform(self.val_labels())
# assume w_0=1 as w is invariant to scaling
w = basinhopping(
lambda w_: -(val_label_indexes == np.argmax((
all_scores_first + all_scores_rest * w_.reshape((len(w_), 1, 1))
).sum(axis=0), axis=1)).sum(), np.ones(len(classifiers) - 1), niter=1000,
minimizer_kwargs=dict(method='L-BFGS-B', bounds=[(0, None)] * (len(classifiers) - 1))
).x
w = np.hstack([[1], w])
w /= w.sum()
logging.info('w: {}'.format(w))
estimator = VotingClassifier(list(zip(names, classifiers)), voting=voting, weights=w)
estimator.le_ = le
estimator.estimators_ = classifiers
return 'vote({})'.format(','.join(names)), estimator
评论列表
文章目录