senti_models.py 文件源码-python代码片段

def fit_voting(self):
        voting = 'soft'
        names = [
            # 'svm(word_n_grams,char_n_grams,all_caps,hashtags,punctuations,punctuation_last,emoticons,emoticon_last,'
            # 'elongated,negation_count)',
            # 'logreg(w2v_doc)',
            # 'logreg(w2v_word_avg_google)',
            'word2vec_bayes',
            'cnn_word(embedding=google)',
            'rnn_word(embedding=google)',
        ]
        classifiers = [ExternalModel({
            self.val_docs: os.path.join(self.data_dir, 'results/val/{}.json'.format(name)),
            self.test_docs: os.path.join(self.data_dir, 'results/test/{}.json'.format(name)),
        }) for name in names]
        all_scores = []
        for classifier in classifiers:
            scores = classifier.predict_proba(self.val_docs)
            if voting == 'hard':
                scores = Binarizer(1 / 3).transform(scores)
            all_scores.append(scores)
        all_scores = np.array(all_scores)
        all_scores_first, all_scores_rest = all_scores[0], all_scores[1:]
        le = LabelEncoder().fit(self.classes_)
        val_label_indexes = le.transform(self.val_labels())
        # assume w_0=1 as w is invariant to scaling
        w = basinhopping(
            lambda w_: -(val_label_indexes == np.argmax((
                all_scores_first + all_scores_rest * w_.reshape((len(w_), 1, 1))
            ).sum(axis=0), axis=1)).sum(), np.ones(len(classifiers) - 1), niter=1000,
            minimizer_kwargs=dict(method='L-BFGS-B', bounds=[(0, None)] * (len(classifiers) - 1))
        ).x
        w = np.hstack([[1], w])
        w /= w.sum()
        logging.info('w: {}'.format(w))
        estimator = VotingClassifier(list(zip(names, classifiers)), voting=voting, weights=w)
        estimator.le_ = le
        estimator.estimators_ = classifiers
        return 'vote({})'.format(','.join(names)), estimator