python类scale()的实例源码

neural_data.py 文件源码 项目:neural-finance 作者: Metnew 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def sk_min_max(X):
    min_max_scaler = MinMaxScaler()
    # X = scale(X, axis=0, with_mean=True, with_std=True, copy=True)
    return min_max_scaler.fit_transform(X)
neural_data.py 文件源码 项目:neural-finance 作者: Metnew 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def sk_scale(X):
    return scale(X, axis=0, with_mean=True, with_std=True, copy=True )
RegressionBase.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def Standardization(self):
        # feature 10: minimum price so far; feature 11: maximum price so far
        # feature 12: current price
        scaled = preprocessing.scale(self.X_train[:, 10:13])
        self.X_train[:, 10:13] = scaled

        scaled = preprocessing.scale(self.X_test[:, 10:13])
        self.X_test[:, 10:13] = scaled
ClassficationBase.py 文件源码 项目:AirTicketPredicting 作者: junlulocky 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def Standardization(self):
        scaled = preprocessing.scale(self.X_train[:, 10:12])
        self.X_train[:, 10:12] = scaled

        scaled = preprocessing.scale(self.X_test[:, 10:12])
        self.X_test[:, 10:12] = scaled
Classification.py 文件源码 项目:UVA 作者: chiachun 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def prepare(self):
        with open('%s' % self.cfg.pca_pkl, 'r') as pklfile:
            self.pca = pickle.load(pklfile)
        try:
            self.df = self.df.query('face == 1')
        except:
            print 'Face column not found in the dataframe',
            print 'Treated as not being processed by skin_filter.'

        x = self.df[self.ftcols].as_matrix()
        x = preprocessing.scale(x)
        xp = self.pca.transform(x)
        self.dfp = pd.DataFrame(xp)
        self.dfp[['number','time']] = self.df[['number','time']]
upca.py 文件源码 项目:UVA 作者: chiachun 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def fit(self, X, STANDARDIZE=True, n=10):
      if not isinstance(X, np.ndarray):
          X =  to_array(X)
      assert(X.ndim == 2), "Input array must have two dimensions."
      if not check_standardized(X):
          if STANDARDIZE:
              X = preprocessing.scale(X)
              print "Standardize input data for fit."
          else:
              print "WARNING: data is not standardized and you switch off STANDARDIZE option.",
              print "Make sure this is what you intended."
      self.model = PCA(n_components=n)
      self.model.fit(X)
model.py 文件源码 项目:5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def svc_rbf_xyat(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"].apply(np.log10)
        return preprocessing.scale(df_new.values)

    logging.info("train svc_rbf_xyat model")
    clf = SVC(kernel='rbf', probability=True, cache_size=3000)
    clf.fit(prepare_feats(df_cell_train_feats), y_train)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred
model.py 文件源码 项目:5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def svc_lin_xyat(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"].apply(np.log10)
        return preprocessing.scale(df_new.values)

    logging.info("train svc_lin_xyat model")
    clf = SVC(kernel='linear', probability=True, cache_size=3000)
    clf.fit(prepare_feats(df_cell_train_feats), y_train)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred
model.py 文件源码 项目:5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def svc_rbf_xyatu(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"]
        return preprocessing.scale(df_new.values)

    logging.info("train svc_rbf_xyatu model")
    clf = SVC(kernel='rbf', probability=True, cache_size=3000)
    clf.fit(prepare_feats(df_cell_train_feats), y_train)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred
model.py 文件源码 项目:5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def svc_lin_xyatu(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"]
        return preprocessing.scale(df_new.values)

    logging.info("train svc_lin_xyatu model")
    clf = SVC(kernel='linear', probability=True, cache_size=3000)
    clf.fit(prepare_feats(df_cell_train_feats), y_train)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred
classifier.py 文件源码 项目:Clustering 作者: Ram81 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def scaleDataset(data):
    '''
        Scaling the dataset between 1 to -1
    '''
    data = scale(data)

    return data
recommender.py 文件源码 项目:Episodes 作者: guptachetan1997 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def get_recommendations():
    module_dir = os.path.dirname(__file__)

    train_df = build_training_set()
    if train_df is None:
        return []
    x_train = train_df.iloc[:, 5:]
    try:
        x_train = scale(x_train)
    except:
        print("First migrations")
    y_train = train_df.iloc[:, 3]
    x_train_labels = train_df.iloc[:, 0]

    target_df = pd.read_csv(os.path.join(module_dir,'data.csv'))
    target_df = pd.DataFrame(target_df)
    target_df = target_df.append(train_df)
    target_df = target_df.append(train_df)
    target_df = target_df.drop_duplicates('SeriesName', keep=False)

    x_target = scale(target_df.iloc[:, 5:])
    x_target_labels = target_df.iloc[:, 0]

    clf = RandomForestClassifier()
    clf.fit(x_train,y_train)

    y_target = clf.predict(x_target)

    new_df = pd.DataFrame()
    new_df['seriesName'] = x_target_labels
    new_df['tvdbID'] = target_df.iloc[:, 1]
    new_df['PredictedRating'] = y_target
    new_df['indicator'] = (target_df.iloc[:, 4]/target_df.iloc[:, 3])*new_df['PredictedRating']

    new_df = new_df.sort_values(['indicator'], ascending=False)
    initial_list = list(new_df.iloc[:4, 1])
    latter_list =  list(new_df.iloc[5:15, 1])
    shuffle(latter_list)
    return list(initial_list + latter_list[:5])
main.py 文件源码 项目:xplore 作者: fahd09 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def scale_feature(self, col=None, scaling=None, scaling_parms=None):
        '''
        Scales a given set  of numerical columns. This only works for columns 
        with numerical values. 

        Parameters
        ----------
        col : a string of a column name, or a list of many columns names or
                None (default). If col is None, all numerical columns will 
                be used.
        scaling  : {'zscore', 'minmax_scale' (default), 'scale', 'maxabs_scale', 
                    'robust_scale'}
            User-defined scaling functions can also be used through self.transform_feature
        scaling_parms : dictionary
            any additional parameters to be used for sklearn's scaling functions.

        '''            
        self._validate_params(params_list   = {'col':col,'scaling':scaling},
                              expected_types= {'col':[str,list,type(None)], 'scaling':[str,type(None)]})        

        if scaling is None: scaling = 'minmax_scale'

        if scaling == 'zscore':
            scaling = 'lambda x: (x - x.mean()) / x.std()'
        elif scaling ==  'minmax_scale' and scaling_parms is None:
            scaling_parms = {'feature_range':(0, 1),'axis':0}
        elif scaling ==  'scale' and scaling_parms is None:
            scaling_parms = {'with_mean':True, 'with_std':True,'axis':0}
        elif scaling ==  'maxabs_scale' and scaling_parms is None:
            scaling_parms = {'axis':0}
        elif scaling ==  'robust_scale' and scaling_parms is None:
            scaling_parms = {'with_centering':True, 'with_scaling':True, 'axis':0} # 'quantile_range':(25.0, 75.0), 
        else:
            raise TypeError('UNSUPPORTED scaling TYPE')

        self.transform_feature(col=col, func_str=scaling, addtional_params=scaling_parms)
results_analysis.py 文件源码 项目:syracuse_public 作者: dssg 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def gen_feature_imp_matrix(model_id_list, features_df):

    feature_imp_matrix = pd.DataFrame
    for model_id in model_id_list[:1]:
        feature_imp_matrix = features_df[features_df.model_id == model_id].sort_values("feature", inplace=False).importance.values
    for model_id in model_id_list[1:]:
        b = features_df[features_df.model_id == model_id].sort_values("feature", inplace=False).importance.values
        feature_imp_matrix = np.vstack((feature_imp_matrix, b))
    feature_imp_matrix_normd = scale(np.transpose(feature_imp_matrix),axis=0,with_mean=True, with_std=True, copy=True)

    return feature_imp_matrix_normd
spectrogram.py 文件源码 项目:Automatic_Speech_Recognition 作者: zzw922cn 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def spectrogramPower(audio, window_size=0.02, window_stride=0.01):
    """ short time fourier transform

    Details:
        audio - This is the input time-domain signal you wish to find the spectrogram of. It can't get much simpler than that. In your case, the 
                signal you want to find the spectrogram of is defined in the following code:

        win_length - If you recall, we decompose the image into chunks, and each chunk has a specified width.  window defines the width of each 
                 chunkin terms of samples. As this is a discrete-time signal, you know that this signal was sampled with a particular sampling 
                 frequency and sampling period. You can determine how large the window is in terms of samples by:

                 window_samples = window_time/Ts
        hop_length - the same as stride in convolution network, overlapping width

    """
    samplingRate, samples = wav.read(audio)
    win_length = int(window_size * samplingRate)
    hop_length = int(window_stride * samplingRate)
    n_fft = win_length
    D = librosa.core.stft(samples, n_fft=n_fft,hop_length=hop_length,
                      win_length=win_length)
    mag = np.abs(D)
    log_mag = np.log1p(mag)
    # normalization
    log_mag = preprocessing.scale(log_mag)
    # size: frequency_bins*time_len
    return log_mag
datapreprocessing.py 文件源码 项目:House-Pricing 作者: playing-kaggle 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def standardize(data):

    numeric_list = ['BsmtFullBath', 'LotArea', 'YearRemodAdd', 'GrLivArea', 'BsmtHalfBath', 'MiscVal', 'YearBuilt',
                    'WoodDeckSF', 'KitchenAbvGr', 'TotalBsmtSF', 'GarageArea', 'GarageCars', 'OpenPorchSF', 'MoSold',
                    'LowQualFinSF', 'BedroomAbvGr', 'Fireplaces', '1stFlrSF', 'FullBath', 'BsmtFinSF1', 'BsmtFinSF2',
                    'HalfBath',
                    'Porch', '2ndFlrSF', 'MasVnrArea', 'YrSold', 'BsmtUnfSF', 'LotFrontage', 'TotRmsAbvGrd']

    data.loc[:, numeric_list] = preprocessing.scale(data.loc[:, numeric_list])
model_single.py 文件源码 项目:Tencent_Social_Advertising_Algorithm_Competition 作者: guicunbin 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def get_concat_data(label_csv, label_col, other_csvs, is_rate, important_feats):
    print 'important_feats :  ',len(important_feats)
    rank_feats  = [f for f in get_csv_header(dataset1_csv) if 'click' in f]
    rank_feats  = [f for f in rank_feats if f in important_feats] if important_feats else rank_feats
    X           = pd.read_csv(label_csv, usecols = rank_feats+[label_col]).apply(small_dtype)
    X           = X[:1000000] if is_tiny else X
    print 'concat csvs ......'
    X           = pd.concat([X, get_need_feats(other_csvs, is_rate, is_tiny, important_feats)], axis=1)
    #if label_csv.split('/')[-1] == 'dataset2.csv':
    #    for c in X.columns:
    #        if c.endswith('_fset_total_cnt'):
    #            X = X.drop(X[X[c]==0].index, axis=0)
    feat_cols   = [f for f in X.columns if f != label_col]
    if is_to_csv:
        save_file = label_csv.split('.csv')[0]+'_concat.csv'
        if os.path.exists(save_file):
            print save_file + " has exists"
        else:
            print 'to csv ........'
            X            = X.replace(np.nan, -1)
            X            = X.replace(np.inf, -2)
            X[feat_cols] = scale(X[feat_cols]).astype('float16')
            X.to_csv(save_file, index=False, chunksize = 50000)
    print X.shape
    # TODO cate_feats = [f for f in X.columns if 'click' in f] 
    ## ????????? 3 ???????,??rank_feats ????????
    X,      = change_to_category([X], cate_feats)
    y       = X[label_col].values
    X       = X[feat_cols]
    if label_col == 'label':
        print 'positive percent ',y.mean()
    return X, y
getFeature.py 文件源码 项目:Data-Mining-Project 作者: mrsan22 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def windowCharacter(x):
    tmp = np.zeros((x.shape[0]))
    n=0
    for row in x.iterrows():
        tmp[n] = signalMag(row[1]['X'],row[1]['Y'],row[1]['Z'])
        n=n+1

    # if np.std(tmp) > 5:
    #     return None
    # else:

    p_25 = np.percentile(tmp,25)
    p_75 = np.percentile(tmp,75)
    tmp_25 = [each for each in tmp if each < p_25]
    tmp_75 = [each for each in tmp if each < p_75]

    data_dm = scale(tmp,with_mean=True, with_std=False) # demean data

    (freq_1,power_1) = butterFilter(data_dm,lowcut_1,highcut_1)
    idx_1 = np.argmax(power_1)
    freq_1_sec = np.delete(freq_1,idx_1)
    power_1_sec = np.delete(power_1,idx_1)
    idx_1_sec = np.argmax(power_1_sec)

    (freq_2,power_2) = butterFilter(data_dm,lowcut_2,highcut_2)
    idx_2 = np.argmax(power_2)

    return np.mean(tmp), np.std(tmp), np.median(tmp), np.linalg.norm(tmp_25), np.linalg.norm(tmp_75),np.sum(power_1), freq_1[idx_1],power_1[idx_1], freq_1_sec[idx_1_sec], power_1_sec[idx_1_sec], freq_2[idx_2],power_2[idx_2],freq_1[idx_1]/np.sum(power_1)
io.py 文件源码 项目:countae 作者: gokceneraslan 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def normalize(x, sf, logtrans=True, sfnorm=True, zeromean=True):
    if sfnorm:
        assert len(sf.shape) == 1
        x = x / (sf[:, None]+1e-8)  # colwise div

    if logtrans:
        x = np.log1p(x)

    if zeromean:
        x = scale(x)

    return x
cluster.py 文件源码 项目:betasqaud 作者: AJacobs15 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def test_scale():
    matrix = [[0,30], [1, 27], [3, 24]]

    scaled = pre.scale(matrix)
    print(scaled)


问题


面经


文章

微信
公众号

扫码关注公众号