python类FastICA()的实例源码-面圈网

spectral_data.py 文件源码项目：PySAT 作者: USGS-Astrogeology 项目源码文件源码阅读 56 收藏 0 点赞 0 评论 0

def dim_red(self, col, method, params, kws, load_fit=None):
        if method == 'PCA':
            self.do_dim_red = PCA(*params, **kws)
        if method == 'FastICA':
            self.do_dim_red = FastICA(*params, **kws)
        if method == 't-SNE':
            self.do_dim_red = TSNE(*params, **kws)
        if method == 'LLE':
            self.do_dim_red = LocallyLinearEmbedding(*params, **kws)
        if method == 'JADE-ICA':
            self.do_dim_red = JADE(*params, **kws)
        # TODO: Add ICA-JADE here
        if load_fit:
            self.do_dim_red = load_fit
        else:
            if method != 't-SNE':
                self.do_dim_red.fit(self.df[col])
                dim_red_result = self.do_dim_red.transform(self.df[col])
            else:
                dim_red_result = self.do_dim_red.fit_transform(self.df[col])

        for i in list(range(1, dim_red_result.shape[1] + 1)):  # will need to revisit this for other methods that don't use n_components to make sure column names still mamke sense
            self.df[(method, str(i))] = dim_red_result[:, i - 1]

        return self.do_dim_red

test_algorithms.py 文件源码项目：thunder-factorization 作者: thunder-project 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def test_ica(eng):
    t = linspace(0, 10, 100)
    s1 = sin(t)
    s2 = square(sin(2*t))
    x = c_[s1, s2, s1+s2]
    random.seed(0)
    x += 0.001*random.randn(*x.shape)
    x = fromarray(x, engine=eng)

    def normalize_ICA(s, aT):
        a = aT.T
        c = a.sum(axis=0)
        return s*c, (a/c).T

    from sklearn.decomposition import FastICA
    ica = FastICA(n_components=2, fun='cube', random_state=0)
    s1 = ica.fit_transform(x.toarray())
    aT1 = ica.mixing_.T
    s1, aT1 = normalize_ICA(s1, aT1)

    s2, aT2 = ICA(k=2, svd_method='direct', max_iter=200, seed=0).fit(x)
    s2, aT2 = normalize_ICA(s2, aT2)
    tol=1e-1
    assert allclose_sign_permute(s1, s2, atol=tol)
    assert allclose_sign_permute(aT1, aT2, atol=tol)

regression.py 文件源码项目：PySAT 作者: USGS-Astrogeology 项目源码文件源码阅读 49 收藏 0 点赞 0 评论 0

def fit(self, x, y, i=0):
        # if gaussian processes are being used, data dimensionality needs to be reduced before fitting
        if self.method[i] == 'GP':
            if self.reduce_dim == 'FastICA':
                print('Reducing dimensionality with ICA')
                do_ica = FastICA(n_components=self.n_components)
                self.do_reduce_dim = do_ica.fit(x)
            if self.reduce_dim == 'PCA':
                print('Reducing dimensionality with PCA')
                do_pca = PCA(n_components=self.n_components)
                self.do_reduce_dim = do_pca.fit(x)

            x = self.do_reduce_dim.transform(x)
        #try:
            print('Training model...')
        try:
            self.model.fit(x, y)
            self.goodfit = True
            print(self.model)
        except:
            self.goodfit = False
            if self.method[i] == 'GP':
                print('Model failed to train! (For GP this does not always indicate a problem, especially for low numbers of components.)')
                pass
            else:
                print('Model failed to train!')
                traceback.print_stack()

        if self.ransac:
            self.outliers = np.logical_not(self.model.inlier_mask_)
            print(str(np.sum(self.outliers)) + ' outliers removed with RANSAC')

spectral_data.py 文件源码项目：PySAT 作者: USGS-Astrogeology 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def ica(self, col, nc=None, load_fit=None):
        if nc:
            self.do_ica = FastICA(n_components=nc)
            self.do_ica.fit(self.df[col])
        if load_fit:  # use this to load a previous fit rather than fit the current data
            self.do_ica = load_fit
        ica_result = self.do_ica.transform(self.df[col])
        for i in list(range(1, self.do_ica.n_components + 1)):
            self.df[('ICA', i)] = ica_result[:, i - 1]

preprocessing.py 文件源码项目：image-text-matching 作者: llltttppp 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def generate_icamodel(train_vocabulary='./vocabulary/vocabulary_nv_4w.txt',model_path='./model/ICA/ica_ourword2vec.model'):
    train_vocab =[v.strip() for v in open(train_vocabulary,'r').readlines()]
    train_sample = np.zeros([len(train_vocab),300])
    for i,v in enumerate(train_vocab):
        word = v.split(' ')[0]
        try:
            train_sample[i]= word2vec_model[word]
        except:
            print word
    ica = FastICA(300,max_iter=800)
    ica.fit(train_sample)
    joblib.dump(ica,model_path)

    pass

test_component_analyzers.py 文件源码项目：MENGEL 作者: CodeSpaceHQ 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_independent_component_analyzer(self):
        self.standard_check(FastICA)

dimensionality_reduction.py 文件源码项目：eezzy 作者: 3Blades 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def ICA_results(data, n_comps=None):
    ica = ICA(n_components=n_comps)
    model = ica.fit(data)
    out_data = {'model' : model, 'reconstruction error': ica.components_ }
    return 'ICA', out_data

irisclustering.py 文件源码项目：iris-Clustering-python-PTVS 作者: mjbahmani 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def ReduceDimension(X):
    from sklearn.decomposition import FastICA 
    reducer = FastICA(n_components=2)
    x_r = reducer.fit_transform(X)
    yield 'ICA',x_r[:,0],x_r[:,1]

#=================================================

irisclustering.py 文件源码项目：iris-Clustering-python-PTVS 作者: mjbahmani 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def ReduceDimension(X):
    from sklearn.decomposition import FastICA 
    reducer = FastICA(n_components=2)
    x_r = reducer.fit_transform(X)
    yield 'ICA',x_r[:,0],x_r[:,1]

#=================================================

data_input_processing.py 文件源码项目：CryptoCurrencyTrader 作者: llens 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def preprocessing_inputs(strategy_dictionary, fitting_inputs_scaled):
    if strategy_dictionary['preprocessing'] == 'PCA':
        fitting_inputs_scaled = pca_transform(fitting_inputs_scaled)

    if strategy_dictionary['preprocessing'] == 'FastICA':
        fitting_inputs_scaled, strategy_dictionary = fast_ica_transform(strategy_dictionary, fitting_inputs_scaled)

    return fitting_inputs_scaled, strategy_dictionary

data_input_processing.py 文件源码项目：CryptoCurrencyTrader 作者: llens 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def fast_ica_transform(strategy_dictionary, fitting_inputs_scaled):

    try:
        ica = FastICA()
        ica.fit(fitting_inputs_scaled)

        fitting_inputs_scaled = ica.transform(fitting_inputs_scaled)

    except:
        strategy_dictionary['preprocessing'] = 'None'

    return fitting_inputs_scaled, strategy_dictionary

ICA.py 文件源码项目：thunder-factorization 作者: thunder-project 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _fit_local(self, data):


        from sklearn.decomposition import FastICA
        from numpy import random
        random.seed(self.seed)
        model = FastICA(n_components=self.k, fun="cube", max_iter=self.max_iter, tol=self.tol, random_state=self.seed)
        signals = model.fit_transform(data)
        return signals, model.mixing_.T

brsa.py 文件源码项目：brainiak 作者: brainiak 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def __init__(
            self, n_iter=50, rank=None,
            auto_nuisance=True, n_nureg=None, nureg_zscore=True,
            nureg_method='PCA',
            baseline_single=False, logS_range=1.0, SNR_prior='exp',
            SNR_bins=21, rho_bins=20, tol=1e-4, optimizer='BFGS',
            minimize_options={'gtol': 1e-4, 'disp': False,
                              'maxiter': 20}, random_state=None,
            anneal_speed=10):

        self.n_iter = n_iter
        self.rank = rank
        self.auto_nuisance = auto_nuisance
        self.n_nureg = n_nureg
        self.nureg_zscore = nureg_zscore
        if auto_nuisance:
            assert (n_nureg is None) \
                or (isinstance(n_nureg, int) and n_nureg > 0), \
                'n_nureg should be a positive integer or None'\
                ' if auto_nuisance is True.'
        if self.nureg_zscore:
            self.preprocess_residual = lambda x: _zscore(x)
        else:
            self.preprocess_residual = lambda x: x
        if nureg_method == 'FA':
            self.nureg_method = lambda x: FactorAnalysis(n_components=x)
        elif nureg_method == 'PCA':
            self.nureg_method = lambda x: PCA(n_components=x, whiten=True)
        elif nureg_method == 'SPCA':
            self.nureg_method = lambda x: SparsePCA(n_components=x,
                                                    max_iter=20, tol=tol)
        elif nureg_method == 'ICA':
            self.nureg_method = lambda x: FastICA(n_components=x,
                                                  whiten=True)
        else:
            raise ValueError('nureg_method can only be FA, PCA, '
                             'SPCA(for sparse PCA) or ICA')
        self.baseline_single = baseline_single
        if type(logS_range) is int:
            logS_range = float(logS_range)
        self.logS_range = logS_range
        assert SNR_prior in ['unif', 'lognorm', 'exp'], \
            'SNR_prior can only be chosen from ''unif'', ''lognorm''' \
            ' and ''exp'''
        self.SNR_prior = SNR_prior
        self.SNR_bins = SNR_bins
        self.rho_bins = rho_bins
        self.tol = tol
        self.optimizer = optimizer
        self.minimize_options = minimize_options
        self.random_state = random_state
        self.anneal_speed = anneal_speed
        return

Jason_Liu_stack_res.py 文件源码项目：Kaggle-Mercedes-Benz-Greener-Manufacturing-33th-Solution 作者: arvidzt 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_additional_features(train,test,magic=False,ID=False):
    col = list(test.columns)
    if ID!=True:
        col.remove('ID')
    n_comp = 12
    # tSVD
    tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
    tsvd_results_train = tsvd.fit_transform(train[col])
    tsvd_results_test = tsvd.transform(test[col])
    # PCA
    pca = PCA(n_components=n_comp, random_state=420)
    pca2_results_train = pca.fit_transform(train[col])
    pca2_results_test = pca.transform(test[col])
    # ICA
    ica = FastICA(n_components=n_comp, random_state=420)
    ica2_results_train = ica.fit_transform(train[col])
    ica2_results_test = ica.transform(test[col])
    # GRP
    grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
    grp_results_train = grp.fit_transform(train[col])
    grp_results_test = grp.transform(test[col])
    # SRP
    srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=420)
    srp_results_train = srp.fit_transform(train[col])
    srp_results_test = srp.transform(test[col])
    for i in range(1, n_comp + 1):
        train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1]
        test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1]
        train['pca_' + str(i)] = pca2_results_train[:, i - 1]
        test['pca_' + str(i)] = pca2_results_test[:, i - 1]
        train['ica_' + str(i)] = ica2_results_train[:, i - 1]
        test['ica_' + str(i)] = ica2_results_test[:, i - 1]
        train['grp_' + str(i)] = grp_results_train[:, i - 1]
        test['grp_' + str(i)] = grp_results_test[:, i - 1]
        train['srp_' + str(i)] = srp_results_train[:, i - 1]
        test['srp_' + str(i)] = srp_results_test[:, i - 1]
    if magic==True:
        magic_mat = train[['ID','X0','y']]
        magic_mat = magic_mat.groupby(['X0'])['y'].mean()
        magic_mat = pd.DataFrame({'X0':magic_mat.index,'magic':list(magic_mat)})
        mean_magic = magic_mat['magic'].mean()
        train = train.merge(magic_mat,on='X0',how='left')
        test = test.merge(magic_mat,on='X0',how = 'left')
        test['magic'] = test['magic'].fillna(mean_magic)
    return train,test

## Preparing stacking functions. Each one takes the out of bag values as the Input

## xgb will not be used in this case, but still post it here.

gca.py 文件源码项目：scikit-discovery 作者: MITHaystack 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def process(self, obj_data):
        ''' 
        Perform component analysis on data:

        Results are added to the data wrapper as a dictionary with
        results['CA'] = Eigenvenctors
        results['Projection'] = Projection on to the eigenvectors

        @param obj_data: Data wrapper containing the data
        '''

        num_components = self.ap_paramList[0]()
        component_type = self.ap_paramList[1]()
        start_time = self.ap_paramList[2]()
        end_time = self.ap_paramList[3]()

        results = dict()
        results['start_date'] = start_time
        results['end_date'] = end_time

        if len(self.ap_paramList) >= 5:
            label_names = self.ap_paramList[4]()
        else:
            label_names = None

        cut_data = []
        for label, data, err in obj_data.getIterator():
            if label_names == None or label in label_names:
                cut_data.append(data[start_time:end_time])

        cut_data = np.array(cut_data)

        if len(cut_data) > 0:
            if component_type == 'ICA' :
                ca = FastICA(n_components = num_components)
            else:
                ca = PCA(n_components = num_components)

            time_projection = ca.fit_transform(cut_data.T)
            results['CA'] = ca
            results['Projection'] = time_projection

        else:
            results['CA'] = None
            results['Projection'] = None

        obj_data.addResult(self.str_description, results)

gca.py 文件源码项目：scikit-discovery 作者: MITHaystack 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def process(self, obj_data):
        ''' 
        Perform component analysis on data

        Results are added to the data wrapper as a dictionary with
        results['CA'] = Eigenvenctors
        results['Projection'] = Projection on to the eigenvectors

        @param obj_data: Data wrapper
        '''

        component_type = self.ap_paramList[0]()
        start_time = self.ap_paramList[1]()
        end_time = self.ap_paramList[2]()

        num_components = self.n_components

        results = dict()
        results['start_date'] = start_time
        results['end_date'] = end_time

        cut_data = []
        label_list = []
        for label, data  in obj_data.getIterator():
            for column in self.column_names:
                cut_data.append(data.loc[start_time:end_time, column])
                label_list.append(label)

        cut_data = np.array(cut_data)

        if len(cut_data) > 0:
            if component_type == 'ICA' :
                ca = FastICA(n_components = num_components)
            else:
                ca = PCA(n_components = num_components)

                time_projection = ca.fit_transform(cut_data.T)
                results['CA'] = ca
                results['Projection'] = time_projection

        else:
            results['CA'] = None
            results['Projection'] = None

        results['labels'] = label_list

        obj_data.addResult(self.str_description, results)