python类scale()的实例源码-第2页-面圈网

neural_data.py 文件源码项目：neural-finance 作者: Metnew 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def sk_min_max(X):
    min_max_scaler = MinMaxScaler()
    # X = scale(X, axis=0, with_mean=True, with_std=True, copy=True)
    return min_max_scaler.fit_transform(X)

neural_data.py 文件源码项目：neural-finance 作者: Metnew 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def sk_scale(X):
    return scale(X, axis=0, with_mean=True, with_std=True, copy=True )

RegressionBase.py 文件源码项目：AirTicketPredicting 作者: junlulocky 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def Standardization(self):
        # feature 10: minimum price so far; feature 11: maximum price so far
        # feature 12: current price
        scaled = preprocessing.scale(self.X_train[:, 10:13])
        self.X_train[:, 10:13] = scaled

        scaled = preprocessing.scale(self.X_test[:, 10:13])
        self.X_test[:, 10:13] = scaled

ClassficationBase.py 文件源码项目：AirTicketPredicting 作者: junlulocky 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def Standardization(self):
        scaled = preprocessing.scale(self.X_train[:, 10:12])
        self.X_train[:, 10:12] = scaled

        scaled = preprocessing.scale(self.X_test[:, 10:12])
        self.X_test[:, 10:12] = scaled

Classification.py 文件源码项目：UVA 作者: chiachun 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def prepare(self):
        with open('%s' % self.cfg.pca_pkl, 'r') as pklfile:
            self.pca = pickle.load(pklfile)
        try:
            self.df = self.df.query('face == 1')
        except:
            print 'Face column not found in the dataframe',
            print 'Treated as not being processed by skin_filter.'

        x = self.df[self.ftcols].as_matrix()
        x = preprocessing.scale(x)
        xp = self.pca.transform(x)
        self.dfp = pd.DataFrame(xp)
        self.dfp[['number','time']] = self.df[['number','time']]

upca.py 文件源码项目：UVA 作者: chiachun 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def fit(self, X, STANDARDIZE=True, n=10):
      if not isinstance(X, np.ndarray):
          X =  to_array(X)
      assert(X.ndim == 2), "Input array must have two dimensions."
      if not check_standardized(X):
          if STANDARDIZE:
              X = preprocessing.scale(X)
              print "Standardize input data for fit."
          else:
              print "WARNING: data is not standardized and you switch off STANDARDIZE option.",
              print "Make sure this is what you intended."
      self.model = PCA(n_components=n)
      self.model.fit(X)

model.py 文件源码项目：5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def svc_rbf_xyat(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"].apply(np.log10)
        return preprocessing.scale(df_new.values)

    logging.info("train svc_rbf_xyat model")
    clf = SVC(kernel='rbf', probability=True, cache_size=3000)
    clf.fit(prepare_feats(df_cell_train_feats), y_train)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred

model.py 文件源码项目：5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def svc_lin_xyat(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"].apply(np.log10)
        return preprocessing.scale(df_new.values)

    logging.info("train svc_lin_xyat model")
    clf = SVC(kernel='linear', probability=True, cache_size=3000)
    clf.fit(prepare_feats(df_cell_train_feats), y_train)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred

model.py 文件源码项目：5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def svc_rbf_xyatu(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"]
        return preprocessing.scale(df_new.values)

    logging.info("train svc_rbf_xyatu model")
    clf = SVC(kernel='rbf', probability=True, cache_size=3000)
    clf.fit(prepare_feats(df_cell_train_feats), y_train)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred

model.py 文件源码项目：5th_place_solution_facebook_check_ins 作者: aikinogard 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def svc_lin_xyatu(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"]
        df_new["accuracy"] = df["accuracy"]
        return preprocessing.scale(df_new.values)

    logging.info("train svc_lin_xyatu model")
    clf = SVC(kernel='linear', probability=True, cache_size=3000)
    clf.fit(prepare_feats(df_cell_train_feats), y_train)
    y_test_pred = clf.predict_proba(prepare_feats(df_cell_test_feats))
    return y_test_pred

classifier.py 文件源码项目：Clustering 作者: Ram81 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def scaleDataset(data):
    '''
        Scaling the dataset between 1 to -1
    '''
    data = scale(data)

    return data

recommender.py 文件源码项目：Episodes 作者: guptachetan1997 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def get_recommendations():
    module_dir = os.path.dirname(__file__)

    train_df = build_training_set()
    if train_df is None:
        return []
    x_train = train_df.iloc[:, 5:]
    try:
        x_train = scale(x_train)
    except:
        print("First migrations")
    y_train = train_df.iloc[:, 3]
    x_train_labels = train_df.iloc[:, 0]

    target_df = pd.read_csv(os.path.join(module_dir,'data.csv'))
    target_df = pd.DataFrame(target_df)
    target_df = target_df.append(train_df)
    target_df = target_df.append(train_df)
    target_df = target_df.drop_duplicates('SeriesName', keep=False)

    x_target = scale(target_df.iloc[:, 5:])
    x_target_labels = target_df.iloc[:, 0]

    clf = RandomForestClassifier()
    clf.fit(x_train,y_train)

    y_target = clf.predict(x_target)

    new_df = pd.DataFrame()
    new_df['seriesName'] = x_target_labels
    new_df['tvdbID'] = target_df.iloc[:, 1]
    new_df['PredictedRating'] = y_target
    new_df['indicator'] = (target_df.iloc[:, 4]/target_df.iloc[:, 3])*new_df['PredictedRating']

    new_df = new_df.sort_values(['indicator'], ascending=False)
    initial_list = list(new_df.iloc[:4, 1])
    latter_list =  list(new_df.iloc[5:15, 1])
    shuffle(latter_list)
    return list(initial_list + latter_list[:5])

main.py 文件源码项目：xplore 作者: fahd09 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def scale_feature(self, col=None, scaling=None, scaling_parms=None):
        '''
        Scales a given set  of numerical columns. This only works for columns 
        with numerical values. 

        Parameters
        ----------
        col : a string of a column name, or a list of many columns names or
                None (default). If col is None, all numerical columns will 
                be used.
        scaling  : {'zscore', 'minmax_scale' (default), 'scale', 'maxabs_scale', 
                    'robust_scale'}
            User-defined scaling functions can also be used through self.transform_feature
        scaling_parms : dictionary
            any additional parameters to be used for sklearn's scaling functions.

        '''            
        self._validate_params(params_list   = {'col':col,'scaling':scaling},
                              expected_types= {'col':[str,list,type(None)], 'scaling':[str,type(None)]})        

        if scaling is None: scaling = 'minmax_scale'

        if scaling == 'zscore':
            scaling = 'lambda x: (x - x.mean()) / x.std()'
        elif scaling ==  'minmax_scale' and scaling_parms is None:
            scaling_parms = {'feature_range':(0, 1),'axis':0}
        elif scaling ==  'scale' and scaling_parms is None:
            scaling_parms = {'with_mean':True, 'with_std':True,'axis':0}
        elif scaling ==  'maxabs_scale' and scaling_parms is None:
            scaling_parms = {'axis':0}
        elif scaling ==  'robust_scale' and scaling_parms is None:
            scaling_parms = {'with_centering':True, 'with_scaling':True, 'axis':0} # 'quantile_range':(25.0, 75.0), 
        else:
            raise TypeError('UNSUPPORTED scaling TYPE')

        self.transform_feature(col=col, func_str=scaling, addtional_params=scaling_parms)

results_analysis.py 文件源码项目：syracuse_public 作者: dssg 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def gen_feature_imp_matrix(model_id_list, features_df):

    feature_imp_matrix = pd.DataFrame
    for model_id in model_id_list[:1]:
        feature_imp_matrix = features_df[features_df.model_id == model_id].sort_values("feature", inplace=False).importance.values
    for model_id in model_id_list[1:]:
        b = features_df[features_df.model_id == model_id].sort_values("feature", inplace=False).importance.values
        feature_imp_matrix = np.vstack((feature_imp_matrix, b))
    feature_imp_matrix_normd = scale(np.transpose(feature_imp_matrix),axis=0,with_mean=True, with_std=True, copy=True)

    return feature_imp_matrix_normd

spectrogram.py 文件源码项目：Automatic_Speech_Recognition 作者: zzw922cn 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def spectrogramPower(audio, window_size=0.02, window_stride=0.01):
    """ short time fourier transform

    Details:
        audio - This is the input time-domain signal you wish to find the spectrogram of. It can't get much simpler than that. In your case, the 
                signal you want to find the spectrogram of is defined in the following code:

        win_length - If you recall, we decompose the image into chunks, and each chunk has a specified width.  window defines the width of each 
                 chunkin terms of samples. As this is a discrete-time signal, you know that this signal was sampled with a particular sampling 
                 frequency and sampling period. You can determine how large the window is in terms of samples by:

                 window_samples = window_time/Ts
        hop_length - the same as stride in convolution network, overlapping width

    """
    samplingRate, samples = wav.read(audio)
    win_length = int(window_size * samplingRate)
    hop_length = int(window_stride * samplingRate)
    n_fft = win_length
    D = librosa.core.stft(samples, n_fft=n_fft,hop_length=hop_length,
                      win_length=win_length)
    mag = np.abs(D)
    log_mag = np.log1p(mag)
    # normalization
    log_mag = preprocessing.scale(log_mag)
    # size: frequency_bins*time_len
    return log_mag

datapreprocessing.py 文件源码项目：House-Pricing 作者: playing-kaggle 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def standardize(data):

    numeric_list = ['BsmtFullBath', 'LotArea', 'YearRemodAdd', 'GrLivArea', 'BsmtHalfBath', 'MiscVal', 'YearBuilt',
                    'WoodDeckSF', 'KitchenAbvGr', 'TotalBsmtSF', 'GarageArea', 'GarageCars', 'OpenPorchSF', 'MoSold',
                    'LowQualFinSF', 'BedroomAbvGr', 'Fireplaces', '1stFlrSF', 'FullBath', 'BsmtFinSF1', 'BsmtFinSF2',
                    'HalfBath',
                    'Porch', '2ndFlrSF', 'MasVnrArea', 'YrSold', 'BsmtUnfSF', 'LotFrontage', 'TotRmsAbvGrd']

    data.loc[:, numeric_list] = preprocessing.scale(data.loc[:, numeric_list])

model_single.py 文件源码项目：Tencent_Social_Advertising_Algorithm_Competition 作者: guicunbin 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def get_concat_data(label_csv, label_col, other_csvs, is_rate, important_feats):
    print 'important_feats :  ',len(important_feats)
    rank_feats  = [f for f in get_csv_header(dataset1_csv) if 'click' in f]
    rank_feats  = [f for f in rank_feats if f in important_feats] if important_feats else rank_feats
    X           = pd.read_csv(label_csv, usecols = rank_feats+[label_col]).apply(small_dtype)
    X           = X[:1000000] if is_tiny else X
    print 'concat csvs ......'
    X           = pd.concat([X, get_need_feats(other_csvs, is_rate, is_tiny, important_feats)], axis=1)
    #if label_csv.split('/')[-1] == 'dataset2.csv':
    #    for c in X.columns:
    #        if c.endswith('_fset_total_cnt'):
    #            X = X.drop(X[X[c]==0].index, axis=0)
    feat_cols   = [f for f in X.columns if f != label_col]
    if is_to_csv:
        save_file = label_csv.split('.csv')[0]+'_concat.csv'
        if os.path.exists(save_file):
            print save_file + " has exists"
        else:
            print 'to csv ........'
            X            = X.replace(np.nan, -1)
            X            = X.replace(np.inf, -2)
            X[feat_cols] = scale(X[feat_cols]).astype('float16')
            X.to_csv(save_file, index=False, chunksize = 50000)
    print X.shape
    # TODO cate_feats = [f for f in X.columns if 'click' in f] 
    ## ????????? 3 ???????,??rank_feats ????????
    X,      = change_to_category([X], cate_feats)
    y       = X[label_col].values
    X       = X[feat_cols]
    if label_col == 'label':
        print 'positive percent ',y.mean()
    return X, y

getFeature.py 文件源码项目：Data-Mining-Project 作者: mrsan22 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def windowCharacter(x):
    tmp = np.zeros((x.shape[0]))
    n=0
    for row in x.iterrows():
        tmp[n] = signalMag(row[1]['X'],row[1]['Y'],row[1]['Z'])
        n=n+1

    # if np.std(tmp) > 5:
    #     return None
    # else:

    p_25 = np.percentile(tmp,25)
    p_75 = np.percentile(tmp,75)
    tmp_25 = [each for each in tmp if each < p_25]
    tmp_75 = [each for each in tmp if each < p_75]

    data_dm = scale(tmp,with_mean=True, with_std=False) # demean data

    (freq_1,power_1) = butterFilter(data_dm,lowcut_1,highcut_1)
    idx_1 = np.argmax(power_1)
    freq_1_sec = np.delete(freq_1,idx_1)
    power_1_sec = np.delete(power_1,idx_1)
    idx_1_sec = np.argmax(power_1_sec)

    (freq_2,power_2) = butterFilter(data_dm,lowcut_2,highcut_2)
    idx_2 = np.argmax(power_2)

    return np.mean(tmp), np.std(tmp), np.median(tmp), np.linalg.norm(tmp_25), np.linalg.norm(tmp_75),np.sum(power_1), freq_1[idx_1],power_1[idx_1], freq_1_sec[idx_1_sec], power_1_sec[idx_1_sec], freq_2[idx_2],power_2[idx_2],freq_1[idx_1]/np.sum(power_1)

io.py 文件源码项目：countae 作者: gokceneraslan 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def normalize(x, sf, logtrans=True, sfnorm=True, zeromean=True):
    if sfnorm:
        assert len(sf.shape) == 1
        x = x / (sf[:, None]+1e-8)  # colwise div

    if logtrans:
        x = np.log1p(x)

    if zeromean:
        x = scale(x)

    return x

cluster.py 文件源码项目：betasqaud 作者: AJacobs15 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def test_scale():
    matrix = [[0,30], [1, 27], [3, 24]]

    scaled = pre.scale(matrix)
    print(scaled)