model_base_2.py 文件源码

python
阅读 38 收藏 0 点赞 0 评论 0

项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码
def greedy_select_features(self):
        print('initial shapes:', self.train_.shape, self.test_.shape)
        saved = None if self.debug_ else self.load('chosen_features')

        if saved == None:
            g_best_score = 1e9
            g_best_features = []
            current = set()
            finished = False
        else:
            g_best_features, g_best_score, finished = saved
            current = set(g_best_features)
            print('SFS REUSE:', g_best_score, g_best_features, self.now())

        num_columns = self.train_.shape[1]
        col_names = [str(c) for c in range(num_columns)]
        self.train_.columns = col_names
        self.test_.columns = col_names

        if not finished:
            y = self.y_.ravel()
            scorer = metrics.make_scorer(metrics.log_loss)
            loop_count = len(col_names) - len(g_best_features)
            for _ in range(loop_count):
                avail = set(col_names).difference(current)
                best_score = 1e9
                best_features = None
                for f in avail:
                    newf = list(current | {f})
                    score, _ = self.ccv(linear_model.BayesianRidge(), self.train_[newf], y, scorer)
                    if best_score > score:
                        best_score = score
                        best_features = newf
                current = set(best_features)
                if g_best_score > best_score:
                    g_best_score = best_score
                    g_best_features = best_features
                    print('new best:', g_best_score, g_best_features, self.now())
                if len(best_features) - len(g_best_features) > 5:
                    break
                self.save('chosen_features', (g_best_features, g_best_score, False))
            # now
            self.save('chosen_features', (g_best_features, g_best_score, True))

        print('feature selection complete.', self.now())
        self.train_ = self.train_[g_best_features]
        self.test_ = self.test_[g_best_features]
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号