automl.py 文件源码

python
阅读 30 收藏 0 点赞 0 评论 0

项目:AutoML-Challenge 作者: postech-mlg-exbrain 项目源码 文件源码
def _split_and_dump(self, X, y, valid_X, valid_y):
        if not hasattr(self, '_dm'):
            raise ValueError("It should be called after the dumpmanager _dm is set")

        if self.resampling == 'cv':
            pass
        elif self.resampling == 'holdout':
            if not self._has_valid_data:
                data_size = y.shape[0]
                if data_size >= 100000:
                    valid_ratio = 0.3
                elif 15000 <= data_size < 100000:
                    valid_ratio = 0.2
                else:
                    valid_ratio = 0.15
                valid_size = int(data_size * valid_ratio)
                X, valid_X = X[valid_size:], X[:valid_size]
                y, valid_y = y[valid_size:], y[:valid_size]
        else:
            raise NotImplementedError()

        pkl = {"resampling": self.resampling,
               "X": X, "y": y,
               "valid_X": valid_X, "valid_y": valid_y}

        datafile = os.path.join(self._dm.dir, "data.pkl")
        joblib.dump(pkl, datafile, protocol=-1)

        self._datafile = datafile
        return datafile
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号