python类MinMaxScaler()的实例源码

11.4 standardize.py 文件源码 项目:ML-note 作者: JasonK93 项目源码 文件源码 阅读 54 收藏 0 点赞 0 评论 0
def test_MinMaxScaler():
    '''
    test the method of MinMax Scaler
    :return: None
    '''
    X=[   [1,5,1,2,10],
      [2,6,3,2,7],
      [3,7,5,6,4,],
      [4,8,7,8,1] ]
    print("before transform:",X)
    scaler=MinMaxScaler(feature_range=(0,2))
    scaler.fit(X)
    print("min_ is :",scaler.min_)
    print("scale_ is :",scaler.scale_)
    print("data_max_ is :",scaler.data_max_)
    print("data_min_ is :",scaler.data_min_)
    print("data_range_ is :",scaler.data_range_)
    print("after transform:",scaler.transform(X))
test_visualization.py 文件源码 项目:cartographer 作者: pablodecm 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_graph_simple():
    data, labels = make_circles(n_samples=2000, noise=0.03, factor=0.3)
    params = {'coverer__intervals': 10,
              'coverer__overlap': 0.1,
              'clusterer__min_samples': 3,
              'clusterer__eps': 0.5}
    m = Mapper(params=params)
    scaled_data = MinMaxScaler().fit_transform(data)
    m.fit(data, scaled_data)
    categories = {"labels": labels}
    scales = {"y[0]": scaled_data[:, 0],
              "y[1]": scaled_data[:, 1]}

    json_graph_str = json_graph(m, categories, scales)
    # check if it can be loaded to validate html
    json_graph_dict = json.loads(json_graph_str)
    html_graph_str = html_graph(m, categories, scales)  # validate HTML?
plot_mlp_training_curves.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def plot_on_dataset(X, y, ax, name):
    # for each dataset, plot learning for each learning strategy
    print("\nlearning on dataset %s" % name)
    ax.set_title(name)
    X = MinMaxScaler().fit_transform(X)
    mlps = []
    if name == "digits":
        # digits is larger but converges fairly quickly
        max_iter = 15
    else:
        max_iter = 400

    for label, param in zip(labels, params):
        print("training: %s" % label)
        mlp = MLPClassifier(verbose=0, random_state=0,
                            max_iter=max_iter, **param)
        mlp.fit(X, y)
        mlps.append(mlp)
        print("Training set score: %f" % mlp.score(X, y))
        print("Training set loss: %f" % mlp.loss_)
    for mlp, label, args in zip(mlps, labels, plot_args):
            ax.plot(mlp.loss_curve_, label=label, **args)
git_authorship.py 文件源码 项目:saapy 作者: ashapochka 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def normalized_usage_by_package(self, package_usage_frame: pd.DataFrame,
                                    drop_package_prefix: str = None):
        scaler = MinMaxScaler()
        df = package_usage_frame.drop('package', 1)
        df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
        if drop_package_prefix:
            df_scaled['package'] = package_usage_frame['package'].apply(
                lambda text: text[text.startswith(drop_package_prefix)
                                  and len(drop_package_prefix):])
        else:
            df_scaled['package'] = package_usage_frame['package']
        df_sorted = df_scaled.sort_values('user_count').reset_index()
        del df_sorted['index']
        return df_sorted
predict.py 文件源码 项目:golden_touch 作者: at553 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def predict_new(self, input):
        model = self.train_model()
        assert len(input) == 5 and type(input) == list
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaler.fit(self.data)
        inp = scaler.transform([input])
        print(scaler.inverse_transform(model.predict(numpy.array(inp).reshape(1, 1, 5))))


# x = Predict()
# x.predict_new([1243.068, 1298.713, 1336.560, 1299.175, 1288.913])
setup_ifruitfly.py 文件源码 项目:iFruitFly 作者: AdnanMuhib 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
    _val = []
    _coords = []
    file_dir_fix = dir + "\\output_INFLO.csv"
    #f = "C:\Users\Abdullah Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
    with open(file_dir_fix, 'rU') as inp:
        rd = csv.reader(inp)
        for row in rd:
            _val.append([row[1], row[2], row[0]])

    #print(_center)
    _val = np.asarray(_val)
    _val_original = _val
    _val_original = map(myFloat, _val_original)
    _val_original = map(myInt, _val_original)
    #_val_original = map(myTemp, _val_original)
    _val_original = np.asarray(_val_original)
    _val = preprocessing.StandardScaler().fit_transform(_val)
    #_center = preprocessing.MinMaxScaler()
    #_center.fit_transform(_val)
    #_arr = StandardScaler().inverse_transform(_center)
    #print(_arr)
    #print(_center)
    new_file = prefix + file_name + ".png"
    dbFun(_val, _val_original, new_file)
    #_len = len(_center)
    return
iFruitFly_v2.0.py 文件源码 项目:iFruitFly 作者: AdnanMuhib 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
    _val = []
    _coords = []
    file_dir_fix = dir + "\\output_INFLO.csv"
    #f = "C:\Users\Abdullah Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
    with open(file_dir_fix, 'rU') as inp:
        rd = csv.reader(inp)
        for row in rd:
            _val.append([row[1], row[2], row[0]])

    #print(_center)
    _val = np.asarray(_val)
    _val_original = _val
    _val_original = map(myFloat, _val_original)
    _val_original = map(myInt, _val_original)
    #_val_original = map(myTemp, _val_original)
    _val_original = np.asarray(_val_original)
    _val = preprocessing.StandardScaler().fit_transform(_val)
    #_center = preprocessing.MinMaxScaler()
    #_center.fit_transform(_val)
    #_arr = StandardScaler().inverse_transform(_center)
    #print(_arr)
    #print(_center)
    new_file = prefix + file_name + ".png"
    dbFun(_val, _val_original, new_file)
    #_len = len(_center)
    return
iFruitFly_Testing_weka.py 文件源码 项目:iFruitFly 作者: AdnanMuhib 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def v_demo(dir, prefix, pre_prefix, file_name, _dir):
    _val = []
    _coords = []
    file_dir_fix = dir + "\\output_INFLO.csv"
    #f = "C:\Users\Abdullah
    #Akmal\Documents\ifruitfly_temp\output_files\output_INFLO.csv"
    with open(file_dir_fix, 'rU') as inp:
        rd = csv.reader(inp)
        for row in rd:
            _val.append([row[1], row[2], row[0]])

    #print(_center)
    _val = np.asarray(_val)
    _val_original = _val
    _val_original = map(myFloat, _val_original)
    _val_original = map(myInt, _val_original)
    #_val_original = map(myTemp, _val_original)
    _val_original = np.asarray(_val_original)
    _val = preprocessing.StandardScaler().fit_transform(_val)
    #_center = preprocessing.MinMaxScaler()
    #_center.fit_transform(_val)
    #_arr = StandardScaler().inverse_transform(_center)
    #print(_arr)
    #print(_center)
    new_file = prefix + file_name + ".png"
    dbFun(_val, _val_original, new_file)
    #_len = len(_center)
    return

##############################################################################################
# Getting the clusters and printing in the most trivial way as asked by Dr Sheikh Faisal
autoencoder.py 文件源码 项目:website-fingerprinting 作者: AxelGoetz 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def next_batch(self, batches, in_memory):
        """
        Returns the next batch in some fixed-length representation.
        Currently we use Panchenko et al.'s cumulative traces

        @param batches an iterator with all of the batches (
            if in_memory == True:
                in batch-major form without padding
            else:
                A list of paths to the files
        )
        @param in_memory is a boolean value

        @return if in_memory is False, returns a tuple of (dict, [paths]) where paths is a list of paths for each batch
            else it returns a dict for training
        """
        batch = next(batches)
        data_batch = batch

        if not in_memory:
            data_batch = [helpers.read_cell_file(path) for path in batch]

        data_batch = [self._process_trace(trace, self.layers[0]) for trace in data_batch]

        min_max_scaler = MinMaxScaler()
        data_batch = min_max_scaler.fit_transform(data_batch)

        encoder_inputs_ = data_batch
        decoder_targets_ = data_batch

        train_dict = {
            self.encoder_inputs: encoder_inputs_,
            self.decoder_targets: decoder_targets_,
        }

        if not in_memory:
            return (train_dict, batch)
        return train_dict
utilsnn.py 文件源码 项目:rbm-ae-tf 作者: Cospel 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def min_max_scale(X_train, X_test):
    preprocessor = prep.MinMaxScaler().fit(np.concatenate((X_train, X_test), axis=0))
    X_train = preprocessor.transform(X_train)
    X_test = preprocessor.transform(X_test)
    return X_train, X_test
classifiers.py 文件源码 项目:triage 作者: dssg 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0,
                 fit_intercept=True, intercept_scaling=1, class_weight=None,
                 random_state=None, solver='liblinear', max_iter=100,
                 multi_class='ovr', verbose=0, warm_start=False, n_jobs=1):
        self.penalty = penalty
        self.dual = dual
        self.tol = tol
        self.C = C
        self.fit_intercept = fit_intercept
        self.intercept_scaling = intercept_scaling
        self.class_weight = class_weight
        self.random_state = random_state
        self.solver = solver
        self.max_iter = max_iter
        self.multi_class = multi_class
        self.verbose = verbose
        self.warm_start = warm_start
        self.n_jobs = n_jobs

        self.minmax_scaler = MinMaxScaler()
        self.dsapp_cutoff = CutOff()
        self.lr = LogisticRegression(penalty=penalty,
                                     dual=dual,
                                     tol=tol,
                                     C=C,
                                     fit_intercept=fit_intercept,
                                     intercept_scaling=intercept_scaling,
                                     class_weight=class_weight,
                                     random_state=random_state,
                                     solver=solver,
                                     max_iter=max_iter,
                                     multi_class=multi_class,
                                     verbose=verbose,
                                     warm_start=warm_start,
                                     n_jobs=n_jobs)

        self.pipeline = Pipeline([
            ('minmax_scaler', self.minmax_scaler),
            ('dsapp_cutoff', self.dsapp_cutoff),
            ('lr', self.lr)
        ])
knnmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def setClf(self):
        clf = KNeighborsClassifier(n_neighbors = 33)
        min_max_scaler = preprocessing.MinMaxScaler()
        self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
        return
linearregressionmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def setClf(self):
#         self.clf = Ridge(alpha=0.0000001, tol=0.0000001)
        clf = LinearRegression()
        min_max_scaler = preprocessing.MinMaxScaler()
        self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
        return
svmregressionmodel.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def setClf(self):
        clf = SVR(C=100, epsilon=0.1, gamma = 0.0001,cache_size = 10240)
        min_max_scaler = preprocessing.MinMaxScaler()
        self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
        return
pairwise_classification.py 文件源码 项目:AutoFolio 作者: mlindauer 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def __init__(self, classifier_class):
        '''
            Constructor
        '''
        self.classifiers = []
        self.logger = logging.getLogger("PairwiseClassifier")
        self.classifier_class = classifier_class
        self.normalizer = MinMaxScaler()
test_big.py 文件源码 项目:skutil 作者: tgsmith61591 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_large_grid():
        """In this test, we purposely overfit a RandomForest to completely random data
        in order to assert that the test error will far supercede the train error.
        """

        if not SK18:
            custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
        else:
            custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)

        # define the pipe
        pipe = Pipeline([
            ('scaler', SelectiveScaler()),
            ('pca', SelectivePCA(weight=True)),
            ('rf', RandomForestClassifier(random_state=42))
        ])

        # define hyper parameters
        hp = {
            'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
            'pca__whiten': [True, False],
            'pca__weight': [True, False],
            'pca__n_components': uniform(0.75, 0.15),
            'rf__n_estimators': randint(5, 10),
            'rf__max_depth': randint(5, 15)
        }

        # define the grid
        grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)

        # this will fail because we haven't fit yet
        assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)

        # fit the grid
        grid.fit(X_train, y_train)

        # score for coverage -- this might warn...
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grid.score(X_train, y_train)

        # coverage:
        assert grid._estimator_type == 'classifier'

        # get predictions
        tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)

        # evaluate score (SHOULD be better than random...)
        accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)

        # grid score reports:
        # assert fails for bad percentile
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})

        # assert fails for bad y_axis
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})

        # assert passes otherwise
        report_grid_score_detail(grid, charts=True, percentile=0.95)  # just ensure percentile works
RNN.py 文件源码 项目:finance-ml 作者: Omarkhursheed 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def scale(train, test):
    scale_f = MinMaxScaler(feature_range=(-1,1))
    scale_f = scale_f.fit(train)
    train = train.reshape(train.shape[0], train.shape[1])
    train_s = scale_f.transform(train)
    test = test.reshape(test.shape[0], test.shape[1])
    test_s = scale_f.transform(test)
    return scale_f, train_s, test_s
preprocess.py 文件源码 项目:tianchi_power 作者: lvniqi 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get_scaled_user():
    dataset = get_dataset()
    new_df = pd.DataFrame(index=set(dataset.index))
    new_df = new_df.sort_index()
    for user_id in get_user_id_list():
        #print user_id
        if not check_empty(user_id):
            new_df[user_id] = dataset[dataset.user_id == user_id].power_consumption
    new_df_log = new_df.apply(np.log)
    new_df_log_scaled = preprocessing.MinMaxScaler().fit_transform(new_df_log.ix[60:,:].dropna())
    return pd.DataFrame(new_df_log_scaled,columns = new_df_log.columns)
activ_cnn.py 文件源码 项目:Kutils 作者: ishank26 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def scale_features(data):
    extract_features = theano.function([model.layers[0].input], model.layers[
                                       32].output, allow_input_downcast=True)
    features = extract_features(data)
    scale = MinMaxScaler()
    scale_feat = scale.fit_transform(features)
    return scale_feat
modelData.py 文件源码 项目:rdocChallenge 作者: Elyne 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get_bootstrapped_trainset(trainSet, y_train, bootstrap_data, es, estimator, th_bs):
    new_train_set = list(trainSet)
    new_y_train = list(y_train)

    trainAndBSData = trainSet + bootstrap_data

    generateDataDrivenFeats(trainSet, trainAndBSData, es)

    featurized = featurize(trainAndBSData)

    train_feats = [featurized[idx] for idx in range(0, len(trainSet), 1)]
    test_feats = [featurized[idx] for idx in range(len(trainSet), len(trainAndBSData), 1)]

    #Do feature selection on train data
    train_feats = fs.runFeatureSelection(train_feats, y_train, es)
    train_feats, y_train, train_bucket = ss.runSampleSelection(train_feats, y_train,[i for i in range(0, len(trainSet), 1)], es)

    # calculate Inter-annotator weighting. 
    weights_train = getWeights(trainAndBSData, train_bucket, es.weighInterAnnot)

    vectorizer = DictVectorizer()   
    x_train = vectorizer.fit_transform(train_feats)
    x_test = vectorizer.transform(test_feats)

    if es.scaleData:
        min_max_scalar = MinMaxScaler()
        x_train = min_max_scalar.fit_transform(x_train.toarray())
        x_test = min_max_scalar.transform(x_test.toarray())

    model = train(estimator, x_train, y_train, weights_train, model=None)

    y_pred_prob = model.predict_proba(x_test)
    for i, cur_y in enumerate(y_pred_prob):
        if np.max(cur_y) > th_bs:
            new_train_set.append(bootstrap_data[i])
            new_y_train.append(np.argmax(cur_y))

    return (new_train_set, new_y_train) #update none to confidence vector


问题


面经


文章

微信
公众号

扫码关注公众号