python类RobustScaler()的实例源码

predict_2017_07_04_5.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def keras_mlp1(train2, y, test2, v, z):
    from keras import layers
    from keras import models
    from keras import optimizers
    cname = sys._getframe().f_code.co_name
    num_splits = 9
    scaler = preprocessing.RobustScaler()
    train3 = scaler.fit_transform(train2)
    test3 = scaler.transform(test2)
    input_dims = train3.shape[1]
    def build_model():
        input_ = layers.Input(shape=(input_dims,))
        model = layers.Dense(256, kernel_initializer='Orthogonal')(input_)
        #model = layers.BatchNormalization()(model)
        #model = layers.advanced_activations.PReLU()(model)
        model = layers.Activation('selu')(model)
        #model = layers.Dropout(0.7)(model)

        model = layers.Dense(64, kernel_initializer='Orthogonal')(model)
        #model = layers.BatchNormalization()(model)
        model = layers.Activation('selu')(model)
        #model = layers.advanced_activations.PReLU()(model)
        #model = layers.Dropout(0.9)(model)

        model = layers.Dense(16, kernel_initializer='Orthogonal')(model)
        #model = layers.BatchNormalization()(model)
        model = layers.Activation('selu')(model)
        #model = layers.advanced_activations.PReLU()(model)

        model = layers.Dense(1, activation='sigmoid')(model)

        model = models.Model(input_, model)
        model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Nadam())
        #print(model.summary(line_length=120))
        return model
    keras_common(train3, y, test3, v, z, num_splits, cname, build_model)
predict_2017_07_04_5.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def keras_mlp2(train2, y, test2, v, z):
    from keras import layers
    from keras import models
    from keras import optimizers
    cname = sys._getframe().f_code.co_name
    num_splits = 9
    scaler = preprocessing.RobustScaler()
    train3 = scaler.fit_transform(train2)
    test3 = scaler.transform(test2)
    input_dims = train3.shape[1]
    def build_model():
        input_ = layers.Input(shape=(input_dims,))
        model = layers.Dense(1024, kernel_initializer='Orthogonal')(input_)
        model = layers.Activation('selu')(model)

        model = layers.Dense(128, kernel_initializer='Orthogonal')(model)
        model = layers.Activation('selu')(model)

        model = layers.Dense(16, kernel_initializer='Orthogonal')(model)
        model = layers.Activation('selu')(model)

        model = layers.Dense(1, activation='sigmoid')(model)

        model = models.Model(input_, model)
        model.compile(loss = 'binary_crossentropy', optimizer = optimizers.SGD())
        #print(model.summary(line_length=120))
        return model
    keras_common(train3, y, test3, v, z, num_splits, cname, build_model)
predict_2017_07_04_5.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def keras_mlp3(train2, y, test2, v, z):
    from keras import layers
    from keras import models
    from keras import optimizers
    cname = sys._getframe().f_code.co_name
    num_splits = 9
    scaler = preprocessing.RobustScaler()
    train3 = scaler.fit_transform(train2)
    test3 = scaler.transform(test2)
    input_dims = train3.shape[1]
    def build_model():
        input_ = layers.Input(shape=(input_dims,))
        model = layers.Dense(512, kernel_initializer='Orthogonal')(input_)
        model = layers.Activation('selu')(model)

        model = layers.Dense(256, kernel_initializer='Orthogonal')(model)
        model = layers.Activation('selu')(model)

        model = layers.Dense(32, kernel_initializer='Orthogonal')(model)
        model = layers.Activation('selu')(model)

        model = layers.Dense(1, activation='sigmoid')(model)

        model = models.Model(input_, model)
        model.compile(loss = 'binary_crossentropy', optimizer = optimizers.Adam())
        #print(model.summary(line_length=120))
        return model
    keras_common(train3, y, test3, v, z, num_splits, cname, build_model)
lazzy_decomposion.py 文件源码 项目:Power-Consumption-Prediction 作者: YoungGod 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def choose_best_lag(seq, pre_period, lags = range(1,30), Kmax = 200):
    """
    ????lazzy model,?????
    ???(?????????)
    """
    models = []
    # ???
    std_sca = StandardScaler().fit(np.array(seq).reshape(-1,1))
#    rob_sca = RobustScaler().fit(np.array(seq).reshape(-1,1))
    seq = std_sca.transform(np.array(seq).reshape(-1,1))

    # ????????????,???????
    from sklearn.model_selection import train_test_split
    for input_lag in lags:
#        window = input_lag + pre_period
        X, Y = create_dataset(seq.flatten(), input_lag, pre_period)
#        lazzy_models = lazzy_loo(X[-1], X[0:-1], Y[:-1], Kmax)
#        y_pred = lazzy_prediction(X[-1], X[0:-1], Y[:-1], lazzy_models)
#        err = err_evaluation(y_pred.flatten(), Y[-1])
#
#        lazzy_models.sort()
#        models.append((err, input_lag, lazzy_models[0][1]))
        # do more cv
#        for state in range(0,3):
        err = 0.0
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.01, random_state=0)
        for x_q,y_q in zip(X_test,y_test):
            lazzy_models = lazzy_loo(x_q, X_train, y_train, Kmax)
            y_pred = lazzy_prediction(x_q, X_train, y_train, lazzy_models)
            err += err_evaluation(y_pred.flatten(), y_q)
        lazzy_models.sort()
        models.append((err/len(X_test), input_lag, lazzy_models[0][1]))
    models.sort()
    best_lag = models[0][1]
    best_k = models[0][2]
#    fig, ax = plt.subplots()
#    ax.plot(y_pred.flatten(),label='prediction')
#    ax.plot(Y[-1],label='real')
#    ax.set_title('best cv lags')
    return models, best_lag, best_k
test_pipe.py 文件源码 项目:skutil 作者: tgsmith61591 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_random_grid():
    # build a pipeline
    pipe = Pipeline([
        ('retainer',       FeatureRetainer()),  # will retain all
        ('dropper',        FeatureDropper()),  # won't drop any
        ('mapper',         FunctionMapper()),  # pass through
        ('encoder',        OneHotCategoricalEncoder()),  # no object dtypes, so will pass through
        ('collinearity',   MulticollinearityFilterer(threshold=0.85)),
        ('imputer',        SelectiveImputer()),  # pass through
        ('scaler',         SelectiveScaler()),
        ('boxcox',         BoxCoxTransformer()),
        ('nzv',            NearZeroVarianceFilterer(threshold=1e-4)),
        ('pca',            SelectivePCA(n_components=0.9)),
        ('model',          RandomForestClassifier(n_jobs=1))
    ])

    # let's define a set of hyper-parameters over which to search
    hp = {
        'collinearity__threshold':    uniform(loc=.8, scale=.15),
        'collinearity__method':       ['pearson', 'kendall', 'spearman'],
        'scaler__scaler':             [StandardScaler(), RobustScaler()],
        'pca__n_components':          uniform(loc=.75, scale=.2),
        'pca__whiten':                [True, False],
        'model__n_estimators':        randint(5, 10),
        'model__max_depth':           randint(2, 5),
        'model__min_samples_leaf':    randint(1, 5),
        'model__max_features':        uniform(loc=.5, scale=.5),
        'model__max_leaf_nodes':      randint(10, 15)
    }

    # define the gridsearch
    search = RandomizedSearchCV(pipe, hp,
                                n_iter=2,  # just to test it even works
                                scoring='accuracy',
                                cv=2,
                                random_state=42)

    # fit the search
    search.fit(X_train, y_train)

    # test the report
    report_grid_score_detail(search, charts=False)
transforms.py 文件源码 项目:wtte-rnn 作者: ragulpr 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def normalize_padded(padded, means=None, stds=None):
    """Normalize by last dim of padded with means/stds or calculate them.

        .. TODO::
           * consider importing instead ex:

                from sklearn.preprocessing import StandardScaler, RobustScaler
                robust_scaler = RobustScaler()
                x_train = robust_scaler.fit_transform(x_train)
                x_test  = robust_scaler.transform(x_test)
                ValueError: Found array with dim 3. RobustScaler expected <= 2.

           * Don't normalize binary features
           * If events are sparse then this may lead to huge values.
    """
    # TODO epsilon choice is random
    epsilon = 1e-6
    original_dtype = padded.dtype

    is_flat = len(padded.shape) == 2
    if is_flat:
        padded = np.expand_dims(padded, axis=-1)

    n_features = padded.shape[2]
    n_obs = padded.shape[0] * padded.shape[1]

    if means is None:
        means = np.nanmean(np.float128(
            padded.reshape(n_obs, n_features)), axis=0)

    means = means.reshape([1, 1, n_features])
    padded = padded - means

    if stds is None:
        stds = np.nanstd(np.float128(
            padded.reshape(n_obs, n_features)), axis=0)

    stds = stds.reshape([1, 1, n_features])
    if (stds < epsilon).any():
        print('warning. Constant cols: ', np.where((stds < epsilon).flatten()))
        stds[stds < epsilon] = 1.0
        # should be (small number)/1.0 as mean is subtracted.
        # Possible prob depending on machine err

    # 128 float cast otherwise
    padded = (padded / stds).astype(original_dtype)

    if is_flat:
        # Return to flat
        padded = np.squeeze(padded)
    return padded, means, stds
model_base_keras.py 文件源码 项目:mlbootcamp_5 作者: ivan-filonov 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def model(self):
        #cname = sys._getframe().f_code.co_name
        cname = 'keras'
        train, y, test = self.train_, self.y_, self.test_

        np.random.seed(1234)
        train.drop('id', axis=1, inplace=True)
        test.drop('id', axis=1, inplace=True)

        from sklearn import pipeline
        pipe = pipeline.make_pipeline(preprocessing.Imputer(),
                                      preprocessing.RobustScaler())

        train = pipe.fit_transform(train)
        test = pipe.transform(test)

        self.input_dims_ = train.shape[1]
        def build_model():
            return self.build_keras_model()
        batch_size = self.batch_size_
        build_model().summary(line_length=120)
        ss = model_selection.StratifiedKFold(n_splits = self.num_splits_,
                                             random_state = 11,
                                             shuffle = True)
        scores = list()
        model_path = self.temp_name('keras_mlp_weights')
        v, z = self.v_, self.z_
        v[cname] = 0
        z[cname] = 0
        for n, (itrain, ival) in enumerate(ss.split(train, y)):
            xtrain, xval = train[itrain], train[ival]
            ytrain, yval = y[itrain], y[ival]
            model = build_model()
            model.fit(
                    xtrain, ytrain,
                    batch_size = batch_size,
                    epochs = 10000,
                    validation_data = (xval, yval),
                    verbose = 0,
                    callbacks = build_keras_fit_callbacks(model_path),
                    shuffle = True
                )
            model.load_weights(model_path)
            p = model.predict(xval)
            v.loc[ival, cname] += p.ravel()
            score = metrics.log_loss(y[ival], p)
            if score != score:
                raise Exception('NaN score!!!')
            print(cname, 'fold %d: '%(n+1), score, self.now())
            scores.append(score)
            z[cname] += model.predict(test).ravel()
            del model
            for i in range(3): gc.collect(i)
        print('scores:', scores, np.mean(scores), np.std(scores))
        self.drop_temp(model_path)
        cv=np.mean(scores)
        z[cname] /= self.num_splits_
        z['y'] = z[cname]

        return cv, None


问题


面经


文章

微信
公众号

扫码关注公众号