python类empty()的实例源码-面圈网

pleiopred_main.py 文件源码项目：PleioPred 作者: yiminghu 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def annopred_inf(beta_hats, pr_sigi, n=1000, reference_ld_mats=None, ld_window_size=100):
    """
    infinitesimal model with snp-specific heritability derived from annotation
    used as the initial values for MCMC of non-infinitesimal model
    """
    num_betas = len(beta_hats)
    updated_betas = sp.empty(num_betas)
    m = len(beta_hats)

    for i, wi in enumerate(range(0, num_betas, ld_window_size)):
        start_i = wi
        stop_i = min(num_betas, wi + ld_window_size)
        curr_window_size = stop_i - start_i
        Li = 1.0/pr_sigi[start_i: stop_i]
        D = reference_ld_mats[i]
        A = (n/(1))*D + sp.diag(Li)
        A_inv = linalg.pinv(A)
        updated_betas[start_i: stop_i] = sp.dot(A_inv / (1.0/n) , beta_hats[start_i: stop_i])  # Adjust the beta_hats

    return updated_betas

gmm_ridge.py 文件源码项目：HistoricalMap 作者: lennepkade 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def split_data(self,n,v=5):
        ''' The function split the data into v folds. Whatever the number of sample per class
        Input:
            n : the number of samples
            v : the number of folds
        Output: None        
        '''
        step = n //v  # Compute the number of samples in each fold
        sp.random.seed(1)   # Set the random generator to the same initial state
        t = sp.random.permutation(n)    # Generate random sampling of the indices

        indices=[]
        for i in range(v-1):            # group in v fold
            indices.append(t[i*step:(i+1)*step])
        indices.append(t[(v-1)*step:n])

        for i in range(v):
            self.iT.append(sp.asarray(indices[i]))
            l = range(v)
            l.remove(i)
            temp = sp.empty(0,dtype=sp.int64)
            for j in l:            
                temp = sp.concatenate((temp,sp.asarray(indices[j])))
            self.it.append(temp)

glmnetSet.py 文件源码项目：glmnet_py 作者: hanfang 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def glmnetSet(opts = None):
    import scipy

    # default options
    options = {
        "weights"             : scipy.empty([0]),
        "offset"              : scipy.empty([0]),
        "alpha"               : scipy.float64(1.0),
        "nlambda"             : scipy.int32(100),
        "lambda_min"          : scipy.empty([0]),
        "lambdau"             : scipy.empty([0]),
        "standardize"         : True,
        "intr"                : True,
        "thresh"              : scipy.float64(1e-7),
        "dfmax"               : scipy.empty([0]),
        "pmax"                : scipy.empty([0]),
        "exclude"             : scipy.empty([0], dtype = scipy.integer),
        "penalty_factor"      : scipy.empty([0]),
        "cl"                  : scipy.array([[scipy.float64(-scipy.inf)], [scipy.float64(scipy.inf)]]), 
        "maxit"               : scipy.int32(1e5),
        "gtype"               : [],
        "ltype"               : 'Newton',
        "standardize_resp"    : False,
        "mtype"               : 'ungrouped'
   }

    # quick return if no user opts
    if opts == None:
        print('pdco default options:')
        print(options)
        return options

    # if options are passed in by user, update options with values from opts
    optsInOptions = set(opts.keys()) - set(options.keys());
    if len(optsInOptions) > 0:          # assert 'opts' keys are subsets of 'options' keys
        print(optsInOptions, ' : unknown option for glmnetSet')
        raise ValueError('attempting to set glmnet options that are not known to glmnetSet')
    else:        
        options = merge_dicts(options, opts)

    return options

gmm_ridge.py 文件源码项目：dzetsaka 作者: lennepkade 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def split_data(self,n,v=5):
        ''' The function split the data into v folds. Whatever the number of sample per class
        Input:
            n : the number of samples
            v : the number of folds
        Output: None        
        '''
        step = n //v  # Compute the number of samples in each fold
        sp.random.seed(1)   # Set the random generator to the same initial state
        t = sp.random.permutation(n)    # Generate random sampling of the indices

        indices=[]
        for i in range(v-1):            # group in v fold
            indices.append(t[i*step:(i+1)*step])
        indices.append(t[(v-1)*step:n])

        for i in range(v):
            self.iT.append(sp.asarray(indices[i]))
            l = range(v)
            l.remove(i)
            temp = sp.empty(0,dtype=sp.int64)
            for j in l:            
                temp = sp.concatenate((temp,sp.asarray(indices[j])))
            self.it.append(temp)

coord_trimmed.py 文件源码项目：PleioPred 作者: yiminghu 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def _parse_plink_snps_(genotype_file, snp_indices):
    plinkf = plinkfile.PlinkFile(genotype_file)
    samples = plinkf.get_samples()
    num_individs = len(samples)
    num_snps = len(snp_indices)
    raw_snps = sp.empty((num_snps,num_individs),dtype='int8')
    #If these indices are not in order then we place them in the right place while parsing SNPs.
    snp_order = sp.argsort(snp_indices)
    ordered_snp_indices = list(snp_indices[snp_order])
    ordered_snp_indices.reverse()
    print 'Iterating over file to load SNPs'
    snp_i = 0
    next_i = ordered_snp_indices.pop()
    line_i = 0
    max_i = ordered_snp_indices[0]
    while line_i <= max_i:
        if line_i < next_i:
            plinkf.next()
        elif line_i==next_i:
            line = plinkf.next()
            snp = sp.array(line, dtype='int8')
            bin_counts = line.allele_counts()
            if bin_counts[-1]>0:
                mode_v = sp.argmax(bin_counts[:2])
                snp[snp==3] = mode_v
            s_i = snp_order[snp_i]
            raw_snps[s_i]=snp
            if line_i < max_i:
                next_i = ordered_snp_indices.pop()
            snp_i+=1
        line_i +=1
    plinkf.close()
    assert snp_i==len(raw_snps), 'Failed to parse SNPs?'
    num_indivs = len(raw_snps[0])
    freqs = sp.sum(raw_snps,1, dtype='float32')/(2*float(num_indivs))
    return raw_snps, freqs

gmm_ridge.py 文件源码项目：HistoricalMap 作者: lennepkade 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def learn(self,x,y):
        '''
        Function that learns the GMM with ridge regularizationb from training samples
        Input:
            x : the training samples
            y :  the labels
        Output:
            the mean, covariance and proportion of each class, as well as the spectral decomposition of the covariance matrix
        '''

        ## Get information from the data
        C = sp.unique(y).shape[0]
        #C = int(y.max(0))  # Number of classes
        n = x.shape[0]  # Number of samples
        d = x.shape[1]  # Number of variables
        eps = sp.finfo(sp.float64).eps

        ## Initialization
        self.ni = sp.empty((C,1))    # Vector of number of samples for each class
        self.prop = sp.empty((C,1))  # Vector of proportion
        self.mean = sp.empty((C,d))  # Vector of means
        self.cov = sp.empty((C,d,d)) # Matrix of covariance
        self.Q = sp.empty((C,d,d)) # Matrix of eigenvectors
        self.L = sp.empty((C,d)) # Vector of eigenvalues
        self.classnum = sp.empty(C).astype('uint8')

        ## Learn the parameter of the model for each class
        for c,cR in enumerate(sp.unique(y)):

            j = sp.where(y==(cR))[0]

            self.classnum[c] = cR # Save the right label
            self.ni[c] = float(j.size)    
            self.prop[c] = self.ni[c]/n
            self.mean[c,:] = sp.mean(x[j,:],axis=0)
            self.cov[c,:,:] = sp.cov(x[j,:],bias=1,rowvar=0)  # Normalize by ni to be consistent with the update formulae

            # Spectral decomposition
            L,Q = linalg.eigh(self.cov[c,:,:])
            idx = L.argsort()[::-1]
            self.L[c,:] = L[idx]
            self.Q[c,:,:]=Q[:,idx]

gmm_ridge.py 文件源码项目：HistoricalMap 作者: lennepkade 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def predict(self,xt,tau=None,proba=None):
        '''
        Function that predict the label for sample xt using the learned model
        Inputs:
            xt: the samples to be classified
        Outputs:
            y: the class
            K: the decision value for each class
        '''
        ## Get information from the data
        nt = xt.shape[0]        # Number of testing samples
        C = self.ni.shape[0]    # Number of classes

        ## Initialization
        K = sp.empty((nt,C))

        if tau is None:
            TAU=self.tau
        else:
            TAU=tau

        for c in range(C):
            invCov,logdet = self.compute_inverse_logdet(c,TAU)
            cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant

            xtc = xt-self.mean[c,:]
            temp = sp.dot(invCov,xtc.T).T
            K[:,c] = sp.sum(xtc*temp,axis=1)+cst
            del temp,xtc

        ##

        ## Assign the label save in classnum to the minimum value of K 
        yp = self.classnum[sp.argmin(K,1)]

        ## Reassign label with real value

        if proba is None:
            return yp
        else:
            return yp,K

glmnetCoef.py 文件源码项目：glmnet_py 作者: hanfang 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def glmnetCoef(obj, s = None, exact = False):

    if s is None:
        s = obj['lambdau']

    if exact and len(s) > 0:
        raise NotImplementedError('exact = True not implemented in glmnetCoef')

    result = glmnetPredict(obj, scipy.empty([0]), s, 'coefficients')    

    return(result)

glmnetSet.py 文件源码项目：glmnet_py 作者: hanfang 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def glmnetSet(opts = None):
    import scipy

    # default options
    options = {
        "weights"             : scipy.empty([0]),
        "offset"              : scipy.empty([0]),
        "alpha"               : scipy.float64(1.0),
        "nlambda"             : scipy.int32(100),
        "lambda_min"          : scipy.empty([0]),
        "lambdau"             : scipy.empty([0]),
        "standardize"         : True,
        "intr"                : True,
        "thresh"              : scipy.float64(1e-7),
        "dfmax"               : scipy.empty([0]),
        "pmax"                : scipy.empty([0]),
        "exclude"             : scipy.empty([0], dtype = scipy.integer),
        "penalty_factor"      : scipy.empty([0]),
        "cl"                  : scipy.array([[scipy.float64(-scipy.inf)], [scipy.float64(scipy.inf)]]), 
        "maxit"               : scipy.int32(1e5),
        "gtype"               : [],
        "ltype"               : 'Newton',
        "standardize_resp"    : False,
        "mtype"               : 'ungrouped'
   }

    # quick return if no user opts
    if opts == None:
        print('pdco default options:')
        print(options)
        return options

    # if options are passed in by user, update options with values from opts
    optsInOptions = set(opts.keys()) - set(options.keys());
    if len(optsInOptions) > 0:          # assert 'opts' keys are subsets of 'options' keys
        print(optsInOptions, ' : unknown option for glmnetSet')
        raise ValueError('attempting to set glmnet options that are not known to glmnetSet')
    else:        
        options = merge_dicts(options, opts)

    return options

glmnetCoef.py 文件源码项目：glmnet_py 作者: hanfang 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def glmnetCoef(obj, s = None, exact = False):

    if s is None:
        s = obj['lambdau']

    if exact and len(s) > 0:
        raise NotImplementedError('exact = True not implemented in glmnetCoef')

    result = glmnetPredict(obj, scipy.empty([0]), s, 'coefficients')    

    return(result)

glmnetCoef.py 文件源码项目：glmnet_py 作者: hanfang 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def glmnetCoef(obj, s = None, exact = False):

    if s is None:
        s = obj['lambdau']

    if exact and len(s) > 0:
        raise NotImplementedError('exact = True not implemented in glmnetCoef')

    result = glmnetPredict(obj, scipy.empty([0]), s, 'coefficients')    

    return(result)

glmnetCoef.py 文件源码项目：glmnet_py 作者: hanfang 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def glmnetCoef(obj, s = None, exact = False):

    if s is None:
        s = obj['lambdau']

    if exact and len(s) > 0:
        raise NotImplementedError('exact = True not implemented in glmnetCoef')

    result = glmnetPredict(obj, scipy.empty([0]), s, 'coefficients')    

    return(result)

glmnetSet.py 文件源码项目：glmnet_py 作者: hanfang 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def glmnetSet(opts = None):
    import scipy

    # default options
    options = {
        "weights"             : scipy.empty([0]),
        "offset"              : scipy.empty([0]),
        "alpha"               : scipy.float64(1.0),
        "nlambda"             : scipy.int32(100),
        "lambda_min"          : scipy.empty([0]),
        "lambdau"             : scipy.empty([0]),
        "standardize"         : True,
        "intr"                : True,
        "thresh"              : scipy.float64(1e-7),
        "dfmax"               : scipy.empty([0]),
        "pmax"                : scipy.empty([0]),
        "exclude"             : scipy.empty([0], dtype = scipy.integer),
        "penalty_factor"      : scipy.empty([0]),
        "cl"                  : scipy.array([[scipy.float64(-scipy.inf)], [scipy.float64(scipy.inf)]]), 
        "maxit"               : scipy.int32(1e5),
        "gtype"               : [],
        "ltype"               : 'Newton',
        "standardize_resp"    : False,
        "mtype"               : 'ungrouped'
   }

    # quick return if no user opts
    if opts == None:
        print('pdco default options:')
        print(options)
        return options

    # if options are passed in by user, update options with values from opts
    optsInOptions = set(opts.keys()) - set(options.keys());
    if len(optsInOptions) > 0:          # assert 'opts' keys are subsets of 'options' keys
        print(optsInOptions, ' : unknown option for glmnetSet')
        raise ValueError('attempting to set glmnet options that are not known to glmnetSet')
    else:        
        options = merge_dicts(options, opts)

    return options

testing.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def fake_mldata(columns_dict, dataname, matfile, ordering=None):
    """Create a fake mldata data set.

    Parameters
    ----------
    columns_dict : dict, keys=str, values=ndarray
        Contains data as columns_dict[column_name] = array of data.

    dataname : string
        Name of data set.

    matfile : string or file object
        The file name string or the file-like object of the output file.

    ordering : list, default None
        List of column_names, determines the ordering in the data set.

    Notes
    -----
    This function transposes all arrays, while fetch_mldata only transposes
    'data', keep that into account in the tests.
    """
    datasets = dict(columns_dict)

    # transpose all variables
    for name in datasets:
        datasets[name] = datasets[name].T

    if ordering is None:
        ordering = sorted(list(datasets.keys()))
    # NOTE: setting up this array is tricky, because of the way Matlab
    # re-packages 1D arrays
    datasets['mldata_descr_ordering'] = sp.empty((1, len(ordering)),
                                                 dtype='object')
    for i, name in enumerate(ordering):
        datasets['mldata_descr_ordering'][0, i] = name

    scipy.io.savemat(matfile, datasets, oned_as='column')

mixture_model.py 文件源码项目：prml 作者: Yevgnen 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def fit(self, X):
        # Constants
        max_iter = self.max_iter
        tol = self.tol
        n_samples, n_features = X.shape
        n_components = self.n_components

        # Initialize parameters
        self._init_params(X)

        # Initialize
        responsibility = sp.empty((n_samples, n_components))
        log_likelihood = self.log_likelihood(X)

        for iter in range(max_iter):

            # E step
            for n in range(n_samples):
                for k in range(n_components):
                    responsibility[n][k] = self.pdf(X[n], k) / self.pdf(X[n])

            # M step
            eff = sp.sum(responsibility, axis=0)
            for k in range(n_components):
                # Update mean
                mean = sp.dot(responsibility[:, k], X) / eff[k]

                # Update covariance
                cov = sp.zeros((n_features, n_features))
                for n in range(n_samples):
                    cov += responsibility[n][k] * sp.outer(X[n] - mean, X[n] - mean)
                cov /= eff[k]

                # Update the k component
                self.comps[k] = multivariate_normal(mean, cov, allow_singular=True)

                # Update mixture coefficient
                self.coef[k] = eff[k] / n_samples

            # Convergent test
            log_likelihood_new = self.log_likelihood(X)
            diff = log_likelihood_new - log_likelihood
            print('GMM: {0:5d}: {1:10.5e} {2:10.5e}'.format(
                iter, log_likelihood_new, spla.norm(diff) / spla.norm(log_likelihood)))
            if (spla.norm(diff) < tol * spla.norm(log_likelihood)):
                break

            log_likelihood = log_likelihood_new

        return self

function_dataraster.py 文件源码项目：dzetsaka 作者: lennepkade 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def open_data(filename):
    '''
    The function open and load the image given its name. 
    The type of the data is checked from the file and the scipy array is initialized accordingly.
    Input:
        filename: the name of the file
    Output:
        im: the data cube
        GeoTransform: the geotransform information 
        Projection: the projection information
    '''
    data = gdal.Open(filename,gdal.GA_ReadOnly)
    if data is None:
        print 'Impossible to open '+filename
        exit()
    nc = data.RasterXSize
    nl = data.RasterYSize
    d  = data.RasterCount

    # Get the type of the data
    gdal_dt = data.GetRasterBand(1).DataType
    if gdal_dt == gdal.GDT_Byte:
        dt = 'uint8'
    elif gdal_dt == gdal.GDT_Int16:
        dt = 'int16'
    elif gdal_dt == gdal.GDT_UInt16:
        dt = 'uint16'
    elif gdal_dt == gdal.GDT_Int32:
        dt = 'int32'
    elif gdal_dt == gdal.GDT_UInt32:
        dt = 'uint32'

    elif gdal_dt == gdal.GDT_Float32:
        dt = 'float32'
    elif gdal_dt == gdal.GDT_Float64:
        dt = 'float64'
    elif gdal_dt == gdal.GDT_CInt16 or gdal_dt == gdal.GDT_CInt32 or gdal_dt == gdal.GDT_CFloat32 or gdal_dt == gdal.GDT_CFloat64 :
        dt = 'complex64'
    else:
        print 'Data type unkown'
        exit()

    # Initialize the array
    if d == 1:
        im = sp.empty((nl,nc),dtype=dt) 
    else:
        im = sp.empty((nl,nc,d),dtype=dt) 

    if d == 1:
        im[:,:]=data.GetRasterBand(1).ReadAsArray()
    else :
        for i in range(d):
            im[:,:,i]=data.GetRasterBand(i+1).ReadAsArray()

    GeoTransform = data.GetGeoTransform()
    Projection = data.GetProjection()
    data = None
    return im,GeoTransform,Projection

gmm_ridge.py 文件源码项目：dzetsaka 作者: lennepkade 项目源码文件源码阅读 102 收藏 0 点赞 0 评论 0

def learn(self,x,y):
        '''
        Function that learns the GMM with ridge regularizationb from training samples
        Input:
            x : the training samples
            y :  the labels
        Output:
            the mean, covariance and proportion of each class, as well as the spectral decomposition of the covariance matrix
        '''

        ## Get information from the data
        C = sp.unique(y).shape[0]
        #C = int(y.max(0))  # Number of classes
        n = x.shape[0]  # Number of samples
        d = x.shape[1]  # Number of variables
        eps = sp.finfo(sp.float64).eps

        ## Initialization
        self.ni = sp.empty((C,1))    # Vector of number of samples for each class
        self.prop = sp.empty((C,1))  # Vector of proportion
        self.mean = sp.empty((C,d))  # Vector of means
        self.cov = sp.empty((C,d,d)) # Matrix of covariance
        self.Q = sp.empty((C,d,d)) # Matrix of eigenvectors
        self.L = sp.empty((C,d)) # Vector of eigenvalues
        self.classnum = sp.empty(C).astype('uint8')

        ## Learn the parameter of the model for each class
        for c,cR in enumerate(sp.unique(y)):

            j = sp.where(y==(cR))[0]

            self.classnum[c] = cR # Save the right label
            self.ni[c] = float(j.size)    
            self.prop[c] = self.ni[c]/n
            self.mean[c,:] = sp.mean(x[j,:],axis=0)
            self.cov[c,:,:] = sp.cov(x[j,:],bias=1,rowvar=0)  # Normalize by ni to be consistent with the update formulae

            # Spectral decomposition
            L,Q = linalg.eigh(self.cov[c,:,:])
            idx = L.argsort()[::-1]
            self.L[c,:] = L[idx]
            self.Q[c,:,:]=Q[:,idx]