python类skew()的实例源码-第2页-面圈网

xgb_best_script.py 文件源码项目：Kaggle_Allstate 作者: sadz2201 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def mungeskewed(train, test, numeric_feats):
    ntrain = train.shape[0]
    test['loss'] = 0
    train_test = pd.concat((train, test)).reset_index(drop=True)
    skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna()))
    skewed_feats = skewed_feats[skewed_feats > 0.25]
    skewed_feats = skewed_feats.index

    for feats in skewed_feats:
        train_test[feats] = train_test[feats] + 1
        train_test[feats], lam = boxcox(train_test[feats])
    return train_test, ntrain

glove.py 文件源码项目：kaggle-quora-question-pairs 作者: stys 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def features(self, q1, q2):
        q1 = str(q1).lower().split()
        q2 = str(q2).lower().split()
        q1 = [w for w in q1 if w not in stopwords]
        q2 = [w for w in q2 if w not in stopwords]

        wmd = min(self.model.wmdistance(q1, q2), 10)

        q1vec = self.sent2vec(q1)
        q2vec = self.sent2vec(q2)

        if q1vec is not None and q2vec is not None:
            cos = cosine(q1vec, q2vec)
            city = cityblock(q1vec, q2vec)
            jacc = jaccard(q1vec, q2vec)
            canb = canberra(q1vec, q2vec)
            eucl = euclidean(q1vec, q2vec)
            mink = minkowski(q1vec, q2vec, 3)
            bray = braycurtis(q1vec, q2vec)

            q1_skew = skew(q1vec)
            q2_skew = skew(q2vec)
            q1_kurt = kurtosis(q1vec)
            q2_kurt = kurtosis(q2vec)

        else:
            cos = -1
            city = -1
            jacc = -1
            canb = -1
            eucl = -1
            mink = -1
            bray = -1

            q1_skew = 0
            q2_skew = 0
            q1_kurt = 0
            q2_kurt = 0

        return wmd, cos, city, jacc, canb, eucl, mink, bray, q1_skew, q2_skew, q1_kurt, q2_kurt

word2vec.py 文件源码项目：kaggle-quora-question-pairs 作者: stys 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def features(self, q1, q2):
        q1 = str(q1).lower().split()
        q2 = str(q2).lower().split()
        q1 = [w for w in q1 if w not in stopwords]
        q2 = [w for w in q2 if w not in stopwords]

        wmd = min(self.model.wmdistance(q1, q2), 10)
        wmd_norm = min(self.model_norm.wmdistance(q1, q2), 10)

        q1vec = self.sent2vec(q1)
        q2vec = self.sent2vec(q2)

        if q1vec is not None and q2vec is not None:
            cos = cosine(q1vec, q2vec)
            city = cityblock(q1vec, q2vec)
            jacc = jaccard(q1vec, q2vec)
            canb = canberra(q1vec, q2vec)
            eucl = euclidean(q1vec, q2vec)
            mink = minkowski(q1vec, q2vec, 3)
            bray = braycurtis(q1vec, q2vec)

            q1_skew = skew(q1vec)
            q2_skew = skew(q2vec)
            q1_kurt = kurtosis(q1vec)
            q2_kurt = kurtosis(q2vec)

        else:
            cos = -1
            city = -1
            jacc = -1
            canb = -1
            eucl = -1
            mink = -1
            bray = -1

            q1_skew = 0
            q2_skew = 0
            q1_kurt = 0
            q2_kurt = 0

        return wmd, wmd_norm, cos, city, jacc, canb, eucl, mink, bray, q1_skew, q2_skew, q1_kurt, q2_kurt

features.py 文件源码项目：astrobase 作者: waqasbhatti 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def lightcurve_moments(ftimes, fmags, ferrs):
    '''This calculates the weighted mean, stdev, median, MAD, percentiles, skew,
    kurtosis, fraction of LC beyond 1-stdev, and IQR.

    '''

    ndet = len(fmags)

    if ndet > 9:

        # now calculate the various things we need
        series_median = npmedian(fmags)
        series_wmean = (
            npsum(fmags*(1.0/(ferrs*ferrs)))/npsum(1.0/(ferrs*ferrs))
        )
        series_mad = npmedian(npabs(fmags - series_median))
        series_stdev = 1.483*series_mad
        series_skew = spskew(fmags)
        series_kurtosis = spkurtosis(fmags)

        # get the beyond1std fraction
        series_above1std = len(fmags[fmags > (series_median + series_stdev)])
        series_below1std = len(fmags[fmags < (series_median - series_stdev)])

        # this is the fraction beyond 1 stdev
        series_beyond1std = (series_above1std + series_below1std)/float(ndet)

        # get the magnitude percentiles
        series_mag_percentiles = nppercentile(
            fmags,
            [5.0,10,17.5,25,32.5,40,60,67.5,75,82.5,90,95]
        )

        return {
            'median':series_median,
            'wmean':series_wmean,
            'mad':series_mad,
            'stdev':series_stdev,
            'skew':series_skew,
            'kurtosis':series_kurtosis,
            'beyond1std':series_beyond1std,
            'mag_percentiles':series_mag_percentiles,
            'mag_iqr': series_mag_percentiles[8] - series_mag_percentiles[3],
        }


    else:
        LOGERROR('not enough detections in this magseries '
                 'to calculate light curve moments')
        return None

features.py 文件源码项目：AlphaPy 作者: ScottFreeLLC 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def create_scipy_features(base_features, sentinel):
    r"""Calculate the skew, kurtosis, and other statistical features
    for each row.

    Parameters
    ----------
    base_features : numpy array
        The feature dataframe.
    sentinel : float
        The number to be imputed for NaN values.

    Returns
    -------
    sp_features : numpy array
        The calculated SciPy features.

    """

    logger.info("Creating SciPy Features")

    # Generate scipy features

    logger.info("SciPy Feature: geometric mean")
    row_gmean = sps.gmean(base_features, axis=1)
    logger.info("SciPy Feature: kurtosis")
    row_kurtosis = sps.kurtosis(base_features, axis=1)
    logger.info("SciPy Feature: kurtosis test")
    row_ktest, pvalue = sps.kurtosistest(base_features, axis=1)
    logger.info("SciPy Feature: normal test")
    row_normal, pvalue = sps.normaltest(base_features, axis=1)
    logger.info("SciPy Feature: skew")
    row_skew = sps.skew(base_features, axis=1)
    logger.info("SciPy Feature: skew test")
    row_stest, pvalue = sps.skewtest(base_features, axis=1)
    logger.info("SciPy Feature: variation")
    row_var = sps.variation(base_features, axis=1)
    logger.info("SciPy Feature: signal-to-noise ratio")
    row_stn = sps.signaltonoise(base_features, axis=1)
    logger.info("SciPy Feature: standard error of mean")
    row_sem = sps.sem(base_features, axis=1)

    sp_features = np.column_stack((row_gmean, row_kurtosis, row_ktest,
                                   row_normal, row_skew, row_stest,
                                   row_var, row_stn, row_sem))
    sp_features = impute_values(sp_features, 'float64', sentinel)
    sp_features = StandardScaler().fit_transform(sp_features)

    # Return new SciPy features

    logger.info("SciPy Feature Count : %d", sp_features.shape[1])
    return sp_features


#
# Function create_clusters
#

data_preprocess.py 文件源码项目：SCaIP 作者: simonsfoundation 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def create_images_for_labeling(pars):
    import scipy.stats as st
    import os 
    import numpy as np
    import calblitz as cb
    from  glob import glob

    try:
        f_name=pars
        cdir=os.path.dirname(f_name)

        print 'loading'
        m=cb.load(f_name)

        print 'corr image'
        img=m.local_correlations(eight_neighbours=True)
        im=cb.movie(img,fr=1)
        im.save(os.path.join(cdir,'correlation_image.tif'))

        print 'std image'
        img=np.std(m,0)
        im=cb.movie(np.array(img),fr=1)
        im.save(os.path.join(cdir,'std_projection.tif'))


        m1=m.resize(1,1,1./m.fr)   


        print 'median image'
        img=np.median(m1,0)
        im=cb.movie(np.array(img),fr=1)
        im.save(os.path.join(cdir,'median_projection.tif'))     

        print 'save BL'
        m1=m1-img
        m1.save(os.path.join(cdir,'MOV_BL.tif'))
        m1=m1.bilateral_blur_2D()
        m1.save(os.path.join(cdir,'MOV_BL_BIL.tif'))
        m=np.array(m1)

        print 'max image'
        img=np.max(m,0)
        im=cb.movie(np.array(img),fr=1)
        im.save(os.path.join(cdir,'max_projection.tif'))           

        print 'skew image'
        img=st.skew(m,0)
        im=cb.movie(img,fr=1)
        im.save(os.path.join(cdir,'skew_projection.tif'))
        del m
        del m1
    except Exception, e:

        return e

    return f_name

test_window.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_rolling_functions_window_non_shrinkage(self):
        # GH 7764
        s = Series(range(4))
        s_expected = Series(np.nan, index=s.index)
        df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B'])
        df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
        df_expected_panel = Panel(items=df.index, major_axis=df.columns,
                                  minor_axis=df.columns)

        functions = [lambda x: (x.rolling(window=10, min_periods=5)
                                .cov(x, pairwise=False)),
                     lambda x: (x.rolling(window=10, min_periods=5)
                                .corr(x, pairwise=False)),
                     lambda x: x.rolling(window=10, min_periods=5).max(),
                     lambda x: x.rolling(window=10, min_periods=5).min(),
                     lambda x: x.rolling(window=10, min_periods=5).sum(),
                     lambda x: x.rolling(window=10, min_periods=5).mean(),
                     lambda x: x.rolling(window=10, min_periods=5).std(),
                     lambda x: x.rolling(window=10, min_periods=5).var(),
                     lambda x: x.rolling(window=10, min_periods=5).skew(),
                     lambda x: x.rolling(window=10, min_periods=5).kurt(),
                     lambda x: x.rolling(
                         window=10, min_periods=5).quantile(quantile=0.5),
                     lambda x: x.rolling(window=10, min_periods=5).median(),
                     lambda x: x.rolling(window=10, min_periods=5).apply(sum),
                     lambda x: x.rolling(win_type='boxcar',
                                         window=10, min_periods=5).mean()]
        for f in functions:
            try:
                s_result = f(s)
                assert_series_equal(s_result, s_expected)

                df_result = f(df)
                assert_frame_equal(df_result, df_expected)
            except (ImportError):

                # scipy needed for rolling_window
                continue

        functions = [lambda x: (x.rolling(window=10, min_periods=5)
                                .cov(x, pairwise=True)),
                     lambda x: (x.rolling(window=10, min_periods=5)
                                .corr(x, pairwise=True))]
        for f in functions:
            df_result_panel = f(df)
            assert_panel_equal(df_result_panel, df_expected_panel)

lasso_regression.py 文件源码项目：House-Pricing 作者: playing-kaggle 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def pre_process(df):
    # LotFrontage's N/A is assigned zero, will it cause problem?
    df.fillna(value={'MasVnrType': 'None', 'MasVnrArea': 0,'Electrical': 'SBrkr', 'FireplaceQu': 'NoFP', 'GarageType': 'Noga',
                           'GarageFinish': 'Noga', 'GarageQual': 'Noga', 'Fence': 'NoFence', 
                           'BsmtFinSF1':0,'BsmtFinSF2':0,'BsmtUnfSF':0,'TotalBsmtSF':0,'BsmtFullBath':0,'BsmtHalfBath':0,
                           'LotFrontage': 0},
                    inplace=True)

    df.loc[:, 'YrSold'] = 2016 - df.loc[:, 'YrSold']

    df.loc[df.loc[:, 'PoolArea'] != 0, 'PoolArea'] = 1

    df.loc[:, 'Porch'] = np.sum(df.loc[:, ['EnclosedPorch', '3SsnPorch', 'ScreenPorch']], axis=1)
    df.drop(['EnclosedPorch', '3SsnPorch', 'ScreenPorch'], axis=1, inplace=True)

    df.replace({'BsmtFullBath': {3: 2}, 'LotShape': {'IR3': 'IR2'}}, inplace=True)


    # fill missing values in bsmt
    df = fill_bsmt_missing(df)

    def fill_na(df, col_name, value = None):
        if value == None:
            value = df[col_name].mean()
        df.loc[df[col_name].isnull(),col_name] = value

    fill_na(df, 'Fence','WD')
    fill_na(df, 'GarageArea')
    fill_na(df, 'GarageCars')
    fill_na(df, 'SaleType', df['SaleType'].mode().values[0])
    fill_na(df, 'KitchenQual', df['KitchenQual'].mode().values[0])
    fill_na(df, 'Functional', df['Functional'].mode().values[0])
    fill_na(df, 'Exterior1st', df['Exterior1st'].mode().values[0])
    fill_na(df, 'Exterior2nd', df['Exterior2nd'].mode().values[0])
    fill_na(df, 'MSZoning', 'RL')


    bool_cols = np.array([df[col_name].isnull() for col_name in df.columns])
    print('rows containing na:',np.sum(bool_cols.any(axis=0)))
    print('rows all na:',np.sum(bool_cols.all(axis=0)))


    # log1pskewed_feats
    numeric_feats = df.dtypes[df.dtypes != "object"].index

    skewed_feats = df[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness
    skewed_feats = skewed_feats[skewed_feats > 0.75]
    skewed_feats = skewed_feats.index
    df[skewed_feats] = np.log1p(df[skewed_feats])


    return df

#%%
#log transform the target: ignore for test data
#

#train_data = pre_process(train_df.copy())
#test_data = pre_process(test_df.copy())

Scaler.py 文件源码项目：SCFGP 作者: MaxInGaussian 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def fit(self, X):
        self.data["cols"] = list(set(range(X.shape[1])).difference(
            np.where(np.all(X == X[0,:], axis = 0))[0]))
        tX = X[:, self.data["cols"]]
        if(self.algo == "min-max"):
            self.data['min'] = np.min(tX, axis=0)
            self.data['max'] = np.max(tX, axis=0)
        elif(self.algo == "normal"):
            self.data['mu'] = np.mean(tX, axis=0)
            self.data['std'] = np.std(tX, axis=0)
        elif(self.algo == "inv-normal"):
            self.data['mu'] = np.mean(tX, axis=0)
            self.data['std'] = np.std(tX, axis=0)
        elif(self.algo == "auto-normal"):
            self.data['min'] = np.min(tX, axis=0)
            self.data['max'] = np.max(tX, axis=0)
            tX = (tX-self.data["min"])/(self.data["max"]-self.data["min"])
            boxcox = lambda x, lm: (np.sign(x)*np.abs(x)**lm-1)/lm
            self.data['boxcox'] = np.zeros(tX.shape[1])
            for d in range(tX.shape[1]):
                Xd = tX[:, d]
                if(np.unique(tX[:, d]).shape[0] < 10):
                    self.data['boxcox'][d] = 1
                    continue
                skewness = lambda x: skew(x, bias=False)**2
                t_lm = lambda lm: np.log(np.exp(lm[0])+1)
                boxcox_Xd = lambda lm: boxcox(Xd, t_lm(lm))
                obj = lambda lm: skewness(boxcox_Xd(lm))
                bounds = [(-5, 5)]
                lm = minimize(obj, [0.], method='SLSQP', bounds=bounds,
                    options={'ftol': 1e-8, 'maxiter':100, 'disp':False})['x']
                self.data['boxcox'][d] = t_lm(lm)
            lm = self.data['boxcox'][None, :]
            tX = boxcox(tX, lm)
            self.data['mu'] = np.mean(tX, axis=0)
            self.data['std'] = np.std(tX, axis=0)
        elif(self.algo == "auto-inv-normal"):
            self.data['min'] = np.min(tX, axis=0)
            self.data['max'] = np.max(tX, axis=0)
            tX = (tX-self.data["min"])/(self.data["max"]-self.data["min"])
            boxcox = lambda x, lm: (np.sign(x)*np.abs(x)**lm-1)/lm
            self.data['boxcox'] = np.zeros(tX.shape[1])
            for d in range(tX.shape[1]):
                Xd = tX[:, d]
                if(np.unique(tX[:, d]).shape[0] < 10):
                    self.data['boxcox'][d] = 1
                    continue
                skewness = lambda x: skew(x, bias=False)**2
                t_lm = lambda lm: np.log(np.exp(lm[0])+1)
                boxcox_Xd = lambda lm: boxcox(Xd, t_lm(lm))
                obj = lambda lm: skewness(boxcox_Xd(lm))
                bounds = [(-5, 5)]
                lm = minimize(obj, [0.], method='SLSQP', bounds=bounds,
                    options={'ftol': 1e-8, 'maxiter':100, 'disp':False})['x']
                self.data['boxcox'][d] = t_lm(lm)
            lm = self.data['boxcox'][None, :]
            tX = boxcox(tX, lm)
            self.data['mu'] = np.mean(tX, axis=0)
            self.data['std'] = np.std(tX, axis=0)

test_unique.py 文件源码项目：Quora-Kaggle 作者: PPshrimpGo 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def get_features(df_features):
    print('use w2v to document presentation')
    now = datetime.datetime.now()
    print now.strftime('%Y-%m-%d %H:%M:%S')
   #df_features['z_document_dis'] = df_features.apply(lambda x: getDiff_averge(x['question1'], x['question2']), axis = 1)
    print('get_w2v')
    now = datetime.datetime.now()
    print now.strftime('%Y-%m-%d %H:%M:%S') 
    df_features['q1_unique'] = df_features.apply(lambda x: getdiffwords(x['question1'], x['question2']), axis = 1)
    df_features['q2_unique'] = df_features.apply(lambda x: getdiffwords(x['question2'], x['question1']), axis = 1)

    df_features['q1_unique_w2v_weight'] = df_features.q1_unique.map(lambda x: get_vector(" ".join(x)))
    df_features['q2_unique_w2v_weight'] = df_features.q2_unique.map(lambda x: get_vector(" ".join(x)))
    df_features['q1_unique_w2v'] = df_features.q1_unique.map(lambda x: get_weight_vector(" ".join(x)))
    df_features['q2_unique_w2v'] = df_features.q2_unique.map(lambda x: get_weight_vector(" ".join(x)))

    print('z_dist')
    now = datetime.datetime.now()
    print now.strftime('%Y-%m-%d %H:%M:%S') 
    #df_features['z_dist'] = df_features.apply(lambda x:Levenshtein.ratio(x['question1'], x['question2']), axis=1)
    now = datetime.datetime.now()
    print('z_tfidf_cos_sim')
    print now.strftime('%Y-%m-%d %H:%M:%S') 
    #df_features['z_tfidf_cos_sim'] = df_features.apply(lambda x: cos_sim(x['question1'], x['question2']), axis=1)
    now = datetime.datetime.now()
    print('z_w2v_calc')
    print now.strftime('%Y-%m-%d %H:%M:%S') 

    #df_features['z_w2v_unique'] = df_features.apply(lambda x: w2v_cos_sim(x['q1_unique'], x['q2_unique']), axis=1)
    df_features['z_w2v_unique_dis_e_weight'] = df_features.apply(lambda x: spatial.distance.euclidean(x['q1_unique_w2v_weight'], x['q2_unique_w2v_weight']), axis=1)
    df_features['z_w2v_unique_dis_e'] = df_features.apply(lambda x: spatial.distance.euclidean(x['q1_unique_w2v'], x['q2_unique_w2v']), axis=1)    

    df_features['z_w2v_unique_dis_mink_w'] = df_features.apply(lambda x: spatial.distance.minkowski(x['q1_unique_w2v_weight'], x['q2_unique_w2v_weight'],3), axis=1)
    df_features['z_w2v_unique_dis_cityblock_w'] = df_features.apply(lambda x: spatial.distance.cityblock(x['q1_unique_w2v_weight'], x['q2_unique_w2v_weight']), axis=1)
    df_features['z_w2v_unique_dis_canberra_w'] = df_features.apply(lambda x: spatial.distance.canberra(x['q1_unique_w2v_weight'], x['q2_unique_w2v_weight']), axis=1)

    df_features['z_w2v_unique_dis_mink'] = df_features.apply(lambda x: spatial.distance.minkowski(x['q1_unique_w2v'], x['q2_unique_w2v'],3), axis=1)
    df_features['z_w2v_unique_dis_cityblock'] = df_features.apply(lambda x: spatial.distance.cityblock(x['q1_unique_w2v'], x['q2_unique_w2v']), axis=1)
    df_features['z_w2v_unique_dis_canberra'] = df_features.apply(lambda x: spatial.distance.canberra(x['q1_unique_w2v'], x['q2_unique_w2v']), axis=1)

    df_features['z_q1_unique_skew_w'] = df_features.q1_unique_w2v_weight.map(lambda x:skew(x))
    df_features['z_q2_unique_skew_w'] = df_features.q2_unique_w2v_weight.map(lambda x:skew(x))
    df_features['z_q1_unique_kur_w'] = df_features.q1_unique_w2v_weight.map(lambda x:kurtosis(x))
    df_features['z_q2_unique_kur_w'] = df_features.q2_unique_w2v_weight.map(lambda x:kurtosis(x))


    df_features['z_q1_unique_skew'] = df_features.q1_unique_w2v.map(lambda x:skew(x))
    df_features['z_q2_unique_skew'] = df_features.q2_unique_w2v.map(lambda x:skew(x))
    df_features['z_q1_unique_kur'] = df_features.q1_unique_w2v.map(lambda x:kurtosis(x))
    df_features['z_q2_unique_kur'] = df_features.q2_unique_w2v.map(lambda x:kurtosis(x))
    del df_features['q1_unique_w2v_weight']
    del df_features['q2_unique_w2v_weight']
    del df_features['q1_unique_w2v']
    del df_features['q2_unique_w2v']
    print('all done')
    print now.strftime('%Y-%m-%d %H:%M:%S') 
    df_features.fillna(0.0)
    return df_features

ica.py 文件源码项目：decoding_challenge_cortana_2016_3rd 作者: kingjr 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def _detect_artifacts(ica, raw, start_find, stop_find, ecg_ch, ecg_score_func,
                      ecg_criterion, eog_ch, eog_score_func, eog_criterion,
                      skew_criterion, kurt_criterion, var_criterion,
                      add_nodes):
    """Aux Function"""
    from scipy import stats

    nodes = []
    if ecg_ch is not None:
        nodes += [_ica_node('ECG', ecg_ch, ecg_score_func, ecg_criterion)]

    if eog_ch not in [None, []]:
        if not isinstance(eog_ch, list):
            eog_ch = [eog_ch]
        for idx, ch in enumerate(eog_ch):
            nodes += [_ica_node('EOG %02d' % idx, ch, eog_score_func,
                      eog_criterion)]

    if skew_criterion is not None:
        nodes += [_ica_node('skewness', None, stats.skew, skew_criterion)]

    if kurt_criterion is not None:
        nodes += [_ica_node('kurtosis', None, stats.kurtosis, kurt_criterion)]

    if var_criterion is not None:
        nodes += [_ica_node('variance', None, np.var, var_criterion)]

    if add_nodes is not None:
        nodes.extend(add_nodes)

    for node in nodes:
        scores = ica.score_sources(raw, start=start_find, stop=stop_find,
                                   target=node.target,
                                   score_func=node.score_func)
        if isinstance(node.criterion, float):
            found = list(np.where(np.abs(scores) > node.criterion)[0])
        else:
            found = list(np.atleast_1d(abs(scores).argsort()[node.criterion]))

        case = (len(found), 's' if len(found) > 1 else '', node.name)
        logger.info('    found %s artifact%s by %s' % case)
        ica.exclude += found

    logger.info('Artifact indices found:\n    ' + str(ica.exclude).strip('[]'))
    if len(set(ica.exclude)) != len(ica.exclude):
        logger.info('    Removing duplicate indices...')
        ica.exclude = list(set(ica.exclude))

    logger.info('Ready.')

default_predictor.py 文件源码项目：Default-Credit-Card-Prediction 作者: AlexPnt 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def get_feature_stats(self):

        # #get input feature
        feature_input=self.feature_input.currentText()
        try:
            if feature_input[0]=='X':
                try:
                    feature_index=int("".join(feature_input[1:]))
                    feature_index-=1

                except:
                    QtWidgets.QMessageBox.information(self, "Wrong Format","Please enter a feature name in the format: X%d.")
                    return
            elif "".join(feature_input[0]+feature_input[1])=='LD' or "".join(feature_input[0]+feature_input[1])=='PC':
                try:
                    feature_index=int("".join(feature_input[2:]))
                    feature_index-=1

                except:
                    QtWidgets.QMessageBox.information(self, "Wrong Format","Please enter a feature name in the format: X||LD||PC%d.")
                    return
            else:
                QtWidgets.QMessageBox.information(self, "Wrong Format","Feature names must be in the format: X%d.")
                return
        except:
            QtWidgets.QMessageBox.information(self, "Data Not Found","Please load a dataset first.")
            return


        try:
            max_value=self.X[:,feature_index].max()
            min_value=self.X[:,feature_index].min()
            mean_value=self.X[:,feature_index].mean()
            std_value=self.X[:,feature_index].std()
            var_value=self.X[:,feature_index].var()
            skewness=stats.skew(self.X[:,feature_index])
            kurtosis=stats.kurtosis(self.X[:,feature_index],fisher=True)
            chi2,chi_p_val=chi2_feature_test(self.X,self.y,int(feature_index))
            H_kw,kw_p_val=kw_feature_test(self.X,self.y,int(feature_index))
            info_gain=information_gain(self.X,self.y,int(feature_index))
            gain_rt=gain_ratio(self.X,self.y,int(feature_index))

        except:
            QtWidgets.QMessageBox.information(self, "Wrong Index","Feature Index Out Of Bounds.")
            return

        feature_stats="""Statistics:\n\nMinimum Value: """+str(min_value)\
                      +"""\n\nMaximum Value: """+str(max_value)\
                      +"""\n\nMean: """+str(mean_value)\
                      +"""\n\nStandard Deviation: """+str(std_value)\
                      +"""\n\nVariance: """+str(var_value)\
                      +"""\n\nSkewness: """+str(skewness)\
                      +"""\n\nKurtosis: """+str(kurtosis)\
                      +"""\n\nChi Squared Test: """+str(chi2[0])\
                      +"""\n\nKruskal-Wallis Test:  """+str(H_kw)\
                      +"""\n\nInformation Gain: """+str(info_gain)\
                      +"""\n\nGain Ratio: """+str(gain_rt)

        self.feature_stats.setText(feature_stats)