python类sum()的实例源码

libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def prior_log_loss(frac_pos, task = 'binary.classification'):
    ''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
    eps = 1e-15   
    frac_pos_ = sp.maximum (eps, frac_pos)
    if (task != 'multiclass.classification'): # binary case
        frac_neg = 1-frac_pos
        frac_neg_ = sp.maximum (eps, frac_neg)
        pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
        neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
        base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
        # base_log_loss = mvmean(base_log_loss)
        # print('binary {}'.format(base_log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else: # multiclass case
        fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
        # Only ONE label is 1 in the multiclass case active for each line
        pos_class_log_loss_ = - frac_pos * np.log(fp)
        base_log_loss = np.sum(pos_class_log_loss_) 
    return base_log_loss

# sklearn implementations for comparison
data_converter.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def num_lines (filename):
    ''' Count the number of lines of file'''
    return sum(1 for line in open(filename))
data_converter.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def tp_filter(X, Y, feat_num=1000, verbose=True):
    ''' TP feature selection in the spirit of the winners of the KDD cup 2001
    Only for binary classification and sparse matrices'''

    if issparse(X) and len(Y.shape)==1  and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1: 
        if verbose: print("========= Filtering features...")
        Posidx=Y>0
        #npos = sum(Posidx)
        #Negidx=Y<=0
        #nneg = sum(Negidx)

        nz=X.nonzero()
        mx=X[nz].max()
        if X[nz].min()==mx: # sparse binary
            if mx!=1: X[nz]=1
            tp=csr_matrix.sum(X[Posidx,:], axis=0)
            #fn=npos-tp
            #fp=csr_matrix.sum(X[Negidx,:], axis=0)
            #tn=nneg-fp
        else:
            tp=np.sum(X[Posidx,:]>0, axis=0)
            #tn=np.sum(X[Negidx,:]<=0, axis=0)
            #fn=np.sum(X[Posidx,:]<=0, axis=0)
            #fp=np.sum(X[Negidx,:]>0, axis=0)

        tp=np.ravel(tp)
        idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True)   
        return idx[0:feat_num]
    else:
        feat_num = X.shape[1]
        return range(feat_num)
models.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def predict(self, X):
        prediction = self.predict_method(X)
        # Calibrate proba
        if self.task != 'regression' and self.postprocessor!=None:          
            prediction = self.postprocessor.predict_proba(prediction)
        # Keep only 2nd column because the second one is 1-first    
        if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1:
            prediction = prediction[:,1]
        # Make sure the normalization is correct
        if self.task=='multiclass.classification':
            eps = 1e-15
            norma = np.sum(prediction, axis=1)
            for k in range(prediction.shape[0]):
                prediction[k,:] /= sp.maximum(norma[k], eps)  
        return prediction
models.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def fit(self, X, Y):
        if len(Y.shape)==1: 
            Y = np.array([Y]).transpose() # Transform vector into column matrix
            # This is NOT what we want: Y = Y.reshape( -1, 1 ), because Y.shape[1] out of range
        self.n_target = Y.shape[1]                 # Num target values = num col of Y
        self.n_label = len(set(Y.ravel()))         # Num labels = num classes (categories of categorical var if n_target=1 or n_target if labels are binary )
        # Create the right number of copies of the predictor instance
        if len(self.predictors)!=self.n_target:
            predictorInstance = self.predictors[0]
            self.predictors = [predictorInstance]
            for i in range(1,self.n_target):
                self.predictors.append(copy.copy(predictorInstance))
        # Fit all predictors
        for i in range(self.n_target):
            # Update the number of desired prodictos
            if hasattr(self.predictors[i], 'n_estimators'):
                self.predictors[i].n_estimators=self.n_estimators
            # Subsample if desired
            if self.balance:
                pos = Y[:,i]>0
                neg = Y[:,i]<=0
                if sum(pos)<sum(neg): 
                    chosen = pos
                    not_chosen = neg
                else: 
                    chosen = neg
                    not_chosen = pos
                num = sum(chosen)
                idx=filter(lambda(x): x[1]==True, enumerate(not_chosen))
                idx=np.array(zip(*idx)[0])
                np.random.shuffle(idx)
                chosen[idx[0:min(num, len(idx))]]=True
                # Train with chosen samples            
                self.predictors[i].fit(X[chosen,:],Y[chosen,i])
            else:
                self.predictors[i].fit(X,Y[:,i])
        return
sequence2sequence.py 文件源码 项目:lang-reps 作者: chaitanyamalaviya 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def get_batch_loss(self, input_batch, output_batch):

        dynet.renew_cg()

        # Dimension: maxSentLength * minibatch_size
        wids = []
        wids_reversed = []

        # List of lists to store whether an input is
        # present(1)/absent(0) for an example at a time step
        # masks = [] # Dimension: maxSentLength * minibatch_size

        # tot_words = 0
        maxSentLength = max([len(sent) for sent in input_batch])
        for j in range(maxSentLength):
            wids.append([(self.src_vocab[sent[j]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
            wids_reversed.append([(self.src_vocab[sent[len(sent)- j-1]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
            # mask = [(1 if len(sent)>j else 0) for sent in input_batch]
            # masks.append(mask)
            #tot_words += sum(mask)

        embedded_batch = self.embed_batch_seq(wids)
        embedded_batch_reverse = self.embed_batch_seq(wids_reversed)
        encoded_batch = self.encode_batch_seq(embedded_batch, embedded_batch_reverse)

        # pass last hidden state of encoder to decoder
        return self.decode_batch(encoded_batch, output_batch)
TensorFlowInterface.py 文件源码 项目:IntroToDeepLearning 作者: robb-brown 项目源码 文件源码 阅读 350 收藏 0 点赞 0 评论 0
def plotFields(layer,fieldShape=None,channel=None,figOffset=1,cmap=None,padding=0.01):
    # Receptive Fields Summary
    try:
        W = layer.W
    except:
        W = layer
    wp = W.eval().transpose();
    if len(np.shape(wp)) < 4:       # Fully connected layer, has no shape
        fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape) 
    else:           # Convolutional layer already has shape
        features, channels, iy, ix = np.shape(wp)
        if channel is not None:
            fields = wp[:,channel,:,:]
        else:
            fields = np.reshape(wp,[features*channels,iy,ix])

    perRow = int(math.floor(math.sqrt(fields.shape[0])))
    perColumn = int(math.ceil(fields.shape[0]/float(perRow)))

    fig = mpl.figure(figOffset); mpl.clf()

    # Using image grid
    from mpl_toolkits.axes_grid1 import ImageGrid
    grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
    for i in range(0,np.shape(fields)[0]):
        im = grid[i].imshow(fields[i],cmap=cmap); 

    grid.cbar_axes[0].colorbar(im)
    mpl.title('%s Receptive Fields' % layer.name)

    # old way
    # fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
    # tiled = []
    # for i in range(0,perColumn*perRow,perColumn):
    #   tiled.append(np.hstack(fields2[i:i+perColumn]))
    # 
    # tiled = np.vstack(tiled)
    # mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
    mpl.figure(figOffset+1); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()
TensorFlowInterface.py 文件源码 项目:IntroToDeepLearning 作者: robb-brown 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def plotFields(layer,fieldShape=None,channel=None,maxFields=25,figName='ReceptiveFields',cmap=None,padding=0.01):
    # Receptive Fields Summary
    W = layer.W
    wp = W.eval().transpose();
    if len(np.shape(wp)) < 4:       # Fully connected layer, has no shape
        fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)
    else:           # Convolutional layer already has shape
        features, channels, iy, ix = np.shape(wp)
        if channel is not None:
            fields = wp[:,channel,:,:]
        else:
            fields = np.reshape(wp,[features*channels,iy,ix])

    fieldsN = min(fields.shape[0],maxFields)
    perRow = int(math.floor(math.sqrt(fieldsN)))
    perColumn = int(math.ceil(fieldsN/float(perRow)))

    fig = mpl.figure(figName); mpl.clf()

    # Using image grid
    from mpl_toolkits.axes_grid1 import ImageGrid
    grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
    for i in range(0,fieldsN):
        im = grid[i].imshow(fields[i],cmap=cmap);

    grid.cbar_axes[0].colorbar(im)
    mpl.title('%s Receptive Fields' % layer.name)

    # old way
    # fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
    # tiled = []
    # for i in range(0,perColumn*perRow,perColumn):
    #   tiled.append(np.hstack(fields2[i:i+perColumn]))
    #
    # tiled = np.vstack(tiled)
    # mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
    mpl.figure(figName+' Total'); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()
tdose_utilities.py 文件源码 项目:TDOSE 作者: kasperschmidt 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def analytic_convolution_gaussian(mu1,covar1,mu2,covar2):
    """
    The analytic vconvolution of two Gaussians is simply the sum of the two mean vectors
    and the two convariance matrixes

    --- INPUT ---
    mu1         The mean of the first gaussian
    covar1      The covariance matrix of of the first gaussian
    mu2         The mean of the second gaussian
    covar2      The covariance matrix of of the second gaussian

    """
    muconv    = mu1+mu2
    covarconv = covar1+covar2
    return muconv, covarconv

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
domain.py 文件源码 项目:pyballd 作者: Yurlungur 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def inner_product_to_infty(self,gf1,gf2):
        "Inner product on non-compact domain"
        factors = [s.get_scale_factor() for s in self.stencils]
        factor = np.prod(factors)
        integrand = (factor*gf1*self.weights2D*gf2*self.dRdX)
        integrand[-1] = 0
        integral = np.sum(integrand)
        return integral
orthopoly.py 文件源码 项目:pyballd 作者: Yurlungur 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def get_integration_weights(order,nodes=None):
    """
    Returns the integration weights for Gauss-Lobatto quadrature
    as a function of the order of the polynomial we want to
    represent.
    See: https://en.wikipedia.org/wiki/Gaussian_quadrature
    See: arXive:gr-qc/0609020v1
    """
    if np.all(nodes == False):
        nodes=get_quadrature_points(order)
    if poly == polynomial.chebyshev.Chebyshev:
        weights = np.empty((order+1))
        weights[1:-1] = np.pi/order
        weights[0] = np.pi/(2*order)
        weights[-1] = weights[0]
        return weights
    elif poly == polynomial.legendre.Legendre:
        interior_weights = 2/((order+1)*order*poly.basis(order)(nodes[1:-1])**2)
        boundary_weights = np.array([1-0.5*np.sum(interior_weights)])
        weights = np.concatenate((boundary_weights,
                                  interior_weights,
                                  boundary_weights))
        return weights
    else:
        raise ValueError("Not a known polynomial type.")
        return False
orthopoly.py 文件源码 项目:pyballd 作者: Yurlungur 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def inner_product(self,gf1,gf2):
        """Calculates the 2D inner product between grid functions
        gf1 and gf2 using the appropriate quadrature rule
        """
        factors = [s.get_scale_factor() for s in self.stencils]
        factor = np.prod(factors)
        integrand = gf1*self.weights2D*gf2
        integral_unit_cell = np.sum(integrand)
        integral_physical = integral_unit_cell*factor
        return integral_physical
spectral_dns_solver.py 文件源码 项目:mpiFFT4py 作者: spectralDNS 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def compute_rhs(rhs):
    U_dealiased = work[((3,) + FFT.work_shape(dealias), float, 0)]
    curl_dealiased = work[((3,) + FFT.work_shape(dealias), float, 1)]
    for i in range(3):
        U_dealiased[i] = FFT.ifftn(U_hat[i], U_dealiased[i], dealias)

    curl_dealiased = curl(U_hat, curl_dealiased)
    rhs = cross(U_dealiased, curl_dealiased, rhs)
    P_hat[:] = sum(rhs*K_over_K2, 0, out=P_hat)
    rhs -= P_hat*K
    rhs -= nu*K2*U_hat
    return rhs

# Initialize a Taylor Green vortex
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def gof(self):
        r2mean = np.mean(self.r2.T[self.endoexo()[0]].values)
        AVEmean = self.AVE().copy()

        totalblock = 0
        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            block = len(block.columns.values)
            totalblock += block
            AVEmean[self.latent[i]] = AVEmean[self.latent[i]] * block

        AVEmean = np.sum(AVEmean) / totalblock
        return np.sqrt(AVEmean * r2mean)
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def cr(self):
        # Composite Reliability
        composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            p = len(block.columns)

            if(p != 1):
                cor_mat = np.cov(block.T)
                evals, evecs = np.linalg.eig(cor_mat)
                U, S, V = np.linalg.svd(cor_mat, full_matrices=False)

                indices = np.argsort(evals)
                indices = indices[::-1]
                evecs = evecs[:, indices]
                evals = evals[indices]

                loadings = V[0, :] * np.sqrt(evals[0])

                numerador = np.sum(abs(loadings))**2
                denominador = numerador + (p - np.sum(loadings ** 2))
                cr = numerador / denominador
                composite[self.latent[i]] = cr

            else:
                composite[self.latent[i]] = 1

        composite = composite.T
        return(composite)
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def r2adjusted(self):
        n = len(self.data_)
        r2 = self.r2.values

        r2adjusted = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            p = sum(self.LVariables['target'] == self.latent[i])
            r2adjusted[self.latent[i]] = r2[i] - \
                (p * (1 - r2[i])) / (n - p - 1)

        return r2adjusted.T
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def AVE(self):
        # AVE
        return self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def fornell(self):
        cor_ = pd.DataFrame.corr(self.fscores)**2
        AVE = self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())
        for i in range(len(cor_)):
            cor_.ix[i, i] = AVE[i]

        return(cor_)
gac.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def fitness(self, data_, n_clusters, lvmodel, mvmodel, scheme, regression):

        output = pd.DataFrame(self.genes)
        output.columns = ['Split']
        dataSplit = pd.concat([data_, output], axis=1)
        f1 = []
        results = []
        for i in range(n_clusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, lvmodel, mvmodel, scheme,
                                      regression, 0, 50, HOC='true'))

                sumOuterResid = pd.DataFrame.sum(
                    pd.DataFrame.sum(results[i].residuals()[1]**2))
                sumInnerResid = pd.DataFrame.sum(
                    pd.DataFrame.sum(results[i].residuals()[2]**2))
                f1.append(sumOuterResid + sumInnerResid)
            except:
                f1.append(10000)

        print((1 / np.sum(f1)))
        return (1 / np.sum(f1))
gac.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def roulettewheel(pop, fit):
    fit = fit - min(fit)
    sumf = sum(fit)
    if(sumf == 0):
        return pop[0]
    prob = [(item + sum(fit[:index])) / sumf for index, item in enumerate(fit)]
    prob_ = uniform(0, 1)
#    print(prob)
    individuo = (int(BinSearch(prob, prob_, 0, len(prob) - 1)))
    return pop[individuo]


问题


面经


文章

微信
公众号

扫码关注公众号