python类log()的实例源码

hpseq.py 文件源码 项目:seqhawkes 作者: mlukasik 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _beta_update_raw_tfidf(self):
        '''
        Run only once - it does not depend on other parameters.
        '''

        for nodeid in xrange(self.D):
            self.beta[nodeid] = self.W[self.node_vec == nodeid, :
                    ].sum(axis=0)
        for nodeid in xrange(self.D):
            for wordid in xrange(self.beta.shape[1]):
                docs_cnt = np.sum(self.W[self.node_vec == nodeid,
                                  wordid] >= 1)
                docs_cnt += 1  # smooth by adding one
                self.beta[nodeid][wordid] *= 1 + np.log(self.W.shape[0]
                        * 1. / docs_cnt)  # 1+ because we still want to keep words which always occurr, but probably it never happens

        # Laplace smoothing to avoid zeros!

        self.beta += 1
        self._normalize_beta_rowwise()
        return self.beta
original.py 文件源码 项目:dc_stat_think 作者: justinbois 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def b_value(mags, mt, perc=[2.5, 97.5], n_reps=None):
    """Compute the b-value and optionally its confidence interval."""
    # Extract magnitudes above completeness threshold
    m = mags[mags >= mt]

    # Compute b-value
    b = (np.mean(m) - mt) * np.log(10)

    # Draw bootstrap replicates
    if n_reps is None:
        return b
    else:
        m_bs_reps = dcst.draw_bs_reps(m, np.mean, size=n_reps)

        # Compute b-value from replicates
        b_bs_reps = (m_bs_reps - mt) * np.log(10)

        # Compute confidence interval
        conf_int = np.percentile(b_bs_reps, perc)

        return b, conf_int
math_to_code.py 文件源码 项目:STA141C 作者: clarkfitzg 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def f(w, lamb):
    """
    Eq. (2) in problem 2

    Non-vectorized, slow
    """
    total = 0
    nrows = X.shape[0]
    for i in range(nrows):
        current = 1 + np.exp(-y[i] * X[i, ].dot(w))
        total += np.log(current)
    total += (lamb / 2) * w.dot(w)
    return total
math_to_code.py 文件源码 项目:STA141C 作者: clarkfitzg 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def f2(w, lamb):
    """
    Eq. (2) in problem 2

    Vectorized (no explicit loops), fast
    """
    yxTw = y * X.dot(w)
    firstpart = np.log(1 + np.exp(-yxTw))
    total = firstpart.sum()
    total += (lamb / 2) * w.dot(w)
    return total
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15

    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case

    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def prior_log_loss(frac_pos, task = 'binary.classification'):
    ''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
    eps = 1e-15   
    frac_pos_ = sp.maximum (eps, frac_pos)
    if (task != 'multiclass.classification'): # binary case
        frac_neg = 1-frac_pos
        frac_neg_ = sp.maximum (eps, frac_neg)
        pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
        neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
        base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
        # base_log_loss = mvmean(base_log_loss)
        # print('binary {}'.format(base_log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else: # multiclass case
        fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
        # Only ONE label is 1 in the multiclass case active for each line
        pos_class_log_loss_ = - frac_pos * np.log(fp)
        base_log_loss = np.sum(pos_class_log_loss_) 
    return base_log_loss

# sklearn implementations for comparison
tdlm_model.py 文件源码 项目:topically-driven-language-model 作者: jhlau 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def sample(self, probs, temperature):
        if temperature == 0:
            return np.argmax(probs)

        probs = probs.astype(np.float64) #convert to float64 for higher precision
        probs = np.log(probs) / temperature
        probs = np.exp(probs) / math.fsum(np.exp(probs))
        return np.argmax(np.random.multinomial(1, probs, 1))

    #generate a sentence given conv_hidden
model_sentences.py 文件源码 项目:onto-lstm 作者: pdasigi 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test(self, vocab_size, use_onto_lstm, S_ind_test=None, C_ind_test=None, hierarchical=False, base=2, oov_list=None):
    X_test = C_ind_test[:,:-1] if use_onto_lstm else S_ind_test[:,:-1] # remove the last words' hyps in all sentences
    Y_inds_test = S_ind_test[:,1:]
    if hierarchical:
      test_targets = self._factor_target_indices(Y_inds_test, vocab_size, base=base)
    else:
      test_targets = [self._make_one_hot(Y_inds_test, vocab_size)]
    print >>sys.stderr, "Evaluating model on test data"
    test_loss = self.model.evaluate(X_test, test_targets)
    print >>sys.stderr, "Test loss: %.4f"%test_loss
    if oov_list is not None:
      oov_inds = [self.dp.word_index[w] for w in oov_list]
      non_oov_Y_inds = numpy.copy(Y_inds_test)
      for ind in oov_inds:
    non_oov_Y_inds[non_oov_Y_inds == ind] = 0
      non_oov_test_targets = self._factor_target_indices(non_oov_Y_inds, vocab_size, base=base)
      non_oov_test_loss = self.model.evaluate(X_test, non_oov_test_targets)
      print >>sys.stderr, "Non-oov test loss: %.4f"%non_oov_test_loss
    factored_test_preds = [-((numpy.log(pred) * target).sum(axis=-1)) for pred, target in zip(self.model.predict(X_test), test_targets)]
    test_preds = sum(factored_test_preds)
    #non_null_probs = []
    #for test_pred, inds in zip(test_preds, Y_inds_test):
    #  wanted_probs = []
    #  for tp, ind in zip(test_pred, inds):
    #    if ind != 0:
    #      wanted_probs.append(tp)
    #  non_null_probs.append(wanted_probs)
    #return non_null_probs
    return test_preds
fimix.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def data_log_likelihood(self, dataSplit, coefficients, variances):

        log_likelihood = 0.0

        for k in range(self.num_components):

            coef_ = coefficients[k]

            Beta = coef_.ix[self.endoVar][self.endoVar]
            Gamma = coef_.ix[self.endoVar][self.exoVar]

            a_ = (np.dot(Beta, self.fscores[
                  self.endoVar].T) + np.dot(Gamma, self.fscores[self.exoVar].T))

            invert_ = np.linalg.inv(np.array(variances[k]))

            exponential = np.exp(-0.5 * np.dot(np.dot(a_.T, invert_), a_))

            den = (((2 * np.pi)**(self.Q / 2)) *
                   np.sqrt(np.linalg.det(variances[k])))
            probabilities = exponential[0] / den

            log_likelihood += np.log(probabilities).sum()

        print(log_likelihood)
        return log_likelihood
adequacy.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def BTS(data):

    n = data.shape[0]
    p = data.shape[1]

    chi2 = -(n - 1 - (2 * p + 5) / 6) * \
        np.log(np.linalg.det(pd.DataFrame.corr(data)))
    df = p * (p - 1) / 2

    pvalue = scipy.stats.distributions.chi2.sf(chi2, df)

    return [chi2, pvalue]
nvdm.py 文件源码 项目:variational-text-tensorflow 作者: carpedm20 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def build_model(self):
    self.x = tf.placeholder(tf.float32, [self.reader.vocab_size], name="input")
    self.x_idx = tf.placeholder(tf.int32, [None], name="x_idx")

    self.build_encoder()
    self.build_generator()

    # Kullback Leibler divergence
    self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq))

    # Log likelihood
    self.g_loss = -tf.reduce_sum(tf.log(tf.gather(self.p_x_i, self.x_idx) + 1e-10))

    self.loss = self.e_loss + self.g_loss

    self.encoder_var_list, self.generator_var_list = [], []
    for var in tf.trainable_variables():
      if "encoder" in var.name:
        self.encoder_var_list.append(var)
      elif "generator" in var.name:
        self.generator_var_list.append(var)

    # optimizer for alternative update
    self.optim_e = tf.train.AdamOptimizer(learning_rate=self.lr) \
                         .minimize(self.e_loss, global_step=self.step, var_list=self.encoder_var_list)
    self.optim_g = tf.train.AdamOptimizer(learning_rate=self.lr) \
                         .minimize(self.g_loss, global_step=self.step, var_list=self.generator_var_list)

    # optimizer for one shot update
    self.optim = tf.train.AdamOptimizer(learning_rate=self.lr) \
                         .minimize(self.loss, global_step=self.step)

    _ = tf.scalar_summary("encoder loss", self.e_loss)
    _ = tf.scalar_summary("generator loss", self.g_loss)
    _ = tf.scalar_summary("total loss", self.loss)
serving.py 文件源码 项目:treecat 作者: posterior 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def edge_logits(self):
        """Get edge log probabilities on the complete graph."""
serving.py 文件源码 项目:treecat 作者: posterior 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def logprob(self, data):
        """Compute non-normalized log probabilies of many rows of data."""
serving.py 文件源码 项目:treecat 作者: posterior 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def logprob(self, data):
        logprobs = np.stack(
            [server.logprob(data) for server in self._ensemble])
        logprobs = logsumexp(logprobs, axis=0)
        logprobs -= np.log(len(self._ensemble))
        assert logprobs.shape == (data.shape[0], )
        return logprobs
serving.py 文件源码 项目:treecat 作者: posterior 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def edge_logits(self):
        """A [K]-shaped array of log odds of edges in the complete graph."""
        return self._server.edge_logits
mmd_vae_eval.py 文件源码 项目:MMD-Variational-Autoencoder 作者: ShengjiaZhao 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def make_model_path(name):
    log_path = os.path.join('log', name)
    if os.path.isdir(log_path):
        subprocess.call(('rm -rf %s' % log_path).split())
    os.makedirs(log_path)
    return log_path
mmd_vae_eval.py 文件源码 项目:MMD-Variational-Autoencoder 作者: ShengjiaZhao 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def compute_log_sum(val):
    min_val = np.min(val, axis=0, keepdims=True)
    return np.mean(min_val - np.log(np.mean(np.exp(-val + min_val), axis=0)))
mklmm.py 文件源码 项目:MKLMM 作者: omerwe 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def getInitialHyps(self, X, C, y):
        self.logdetXX  = np.linalg.slogdet(C.T.dot(C))[1]

        hyp0_sig2e = [0.5*np.log(0.5*y.var())]
        Linreg = sklearn.linear_model.LinearRegression(fit_intercept=False, normalize=False, copy_X=False)
        Linreg.fit(C, y)
        hyp0_fixedEffects = Linreg.coef_        
        return hyp0_sig2e, hyp0_fixedEffects
regionsRanker.py 文件源码 项目:MKLMM 作者: omerwe 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def rankRegions(self, X, C, y, pos, regionLength, reml=True):

        #get resiong list
        regionsList = self.createRegionsList(pos, regionLength)

        #precompute log determinant of covariates
        XX = C.T.dot(C)
        [Sxx,Uxx]= la.eigh(XX)
        logdetXX  = np.log(Sxx).sum()

        #score each region
        betas = np.zeros(len(regionsList))
        for r_i, r in enumerate(regionsList):
            regionSize = len(r)

            if (self.verbose and r_i % 1000==0):
                print 'Testing region ' + str(r_i+1)+'/'+str(len(regionsList)),
                print 'with', regionSize, 'SNPs\t'

            s,U = self.eigenDecompose(X[:, np.array(r)], None)
            sig2g_kernel, sig2e_kernel, fixedEffects, ll = self.optSigma2(U, s, y, C, logdetXX, reml)
            betas[r_i] = ll

        return regionsList, betas


    ### this code is taken from the FastLMM package (see attached license)###


问题


面经


文章

微信
公众号

扫码关注公众号