model_base_keras.py 文件源码-python代码片段

def model(self):
        #cname = sys._getframe().f_code.co_name
        cname = 'keras'
        train, y, test = self.train_, self.y_, self.test_

        np.random.seed(1234)
        train.drop('id', axis=1, inplace=True)
        test.drop('id', axis=1, inplace=True)

        from sklearn import pipeline
        pipe = pipeline.make_pipeline(preprocessing.Imputer(),
                                      preprocessing.RobustScaler())

        train = pipe.fit_transform(train)
        test = pipe.transform(test)

        self.input_dims_ = train.shape[1]
        def build_model():
            return self.build_keras_model()
        batch_size = self.batch_size_
        build_model().summary(line_length=120)
        ss = model_selection.StratifiedKFold(n_splits = self.num_splits_,
                                             random_state = 11,
                                             shuffle = True)
        scores = list()
        model_path = self.temp_name('keras_mlp_weights')
        v, z = self.v_, self.z_
        v[cname] = 0
        z[cname] = 0
        for n, (itrain, ival) in enumerate(ss.split(train, y)):
            xtrain, xval = train[itrain], train[ival]
            ytrain, yval = y[itrain], y[ival]
            model = build_model()
            model.fit(
                    xtrain, ytrain,
                    batch_size = batch_size,
                    epochs = 10000,
                    validation_data = (xval, yval),
                    verbose = 0,
                    callbacks = build_keras_fit_callbacks(model_path),
                    shuffle = True
                )
            model.load_weights(model_path)
            p = model.predict(xval)
            v.loc[ival, cname] += p.ravel()
            score = metrics.log_loss(y[ival], p)
            if score != score:
                raise Exception('NaN score!!!')
            print(cname, 'fold %d: '%(n+1), score, self.now())
            scores.append(score)
            z[cname] += model.predict(test).ravel()
            del model
            for i in range(3): gc.collect(i)
        print('scores:', scores, np.mean(scores), np.std(scores))
        self.drop_temp(model_path)
        cv=np.mean(scores)
        z[cname] /= self.num_splits_
        z['y'] = z[cname]

        return cv, None