python类entropy()的实例源码-第2页-面圈网

shared_functions.py 文件源码项目：CustomerSim 作者: sisl 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def KL_validate(data_true, data_predicted, n_bins, x_range, n_samples=10000):
    '''"Pr(KL(simulated data||original) > KL(bootstrap original||bootstrap original))'''

    n = data_true.shape[0]

    hist_true, _ = np.histogram(data_true, bins=n_bins, range=x_range)
    hist_predicted, bin_edges = np.histogram(data_predicted, bins=n_bins, range=x_range)

    simulated_KL = sc.entropy(hist_true+1,hist_predicted+1)
    subsampled_KL = []

    for i in xrange(n_samples):
        index1 = np.random.choice(n, n, replace=True)
        index2 = np.random.choice(n, n, replace=True)
        sample1 = data_true[index1]
        sample2 = data_true[index2]
        hist_sample1, _ = np.histogram(sample1, bins=n_bins, range=x_range)
        hist_sample2, _ = np.histogram(sample2, bins=n_bins, range=x_range)
        subsampled_KL.append(sc.entropy(hist_sample2+1,hist_sample1+1))

    subsampled_KL = sorted(subsampled_KL)
    pval = sum( simulated_KL < i for i in subsampled_KL) / float(n_samples)
    conf_interval = (0,subsampled_KL[int(math.ceil(n_samples*0.95))-1])
    return simulated_KL,conf_interval,pval,n

# CONTOUR PLOTS

feature_transforms.py 文件源码项目：catchy 作者: jvbalen 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def first_order(feature, aggregates, verbose=False):
    if not type(aggregates) is list:
        aggregates = [aggregates]
    for aggregate in aggregates:
        if verbose:
            print('        first order computation: ' + aggregate)
        if aggregate == 'log':
            feature = np.log(feature)
        elif aggregate == 'sqrt':
            feature = np.sqrt(feature)
        elif aggregate == 'minlog':
            feature = np.log(1 - feature)
        elif aggregate == 'minsqrt':
            feature = np.sqrt(1 - feature)
        elif aggregate == 'mean':
            # feature = np.mean(feature, axis=0)
            feature = np.nanmean(feature, axis=0)
        elif aggregate == 'var':
            feature = np.var(feature, axis=0)
        elif aggregate == 'std':
            # feature = np.std(feature, axis=0)
            feature = np.nanstd(feature, axis=0)
        elif aggregate == 'stdmean':
            feature = np.hstack([np.mean(feature, axis=0), np.std(feature, axis=0)])
        elif aggregate == 'cov':
            feature = np.flatten(np.cov(feature, axis=0))
        elif aggregate == 'totvar':
            feature = np.array([np.mean(np.var(feature, axis=0))])
        elif aggregate == 'totstd':
            feature = np.array([np.mean(np.std(feature, axis=0))])
        elif aggregate == 'entropy':
            feature = feature.flatten()
            feature = np.array([stats.entropy(feature)])
        elif aggregate == 'normentropy':
            feature = feature.flatten()
            feature = np.array([stats.entropy(feature) / np.log(feature.size)])
        elif aggregate == 'information':
            feature = - np.log(feature)

    return feature

entropy.py 文件源码项目：leven-squash 作者: dwcoates 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_entropy(self, probs):
        """
        Estimate the entropy of string in Shannons. That is, this method
        assumes that the frequency of characters in the input string is
        exactly equal to the probability mass function.
        """
        # calculates entropy in Nats
        ent_nat = entropy(probs)

        # convert to Shannons
        ent_shan = ent_nat * 1 / np.log(2)

        return ent_shan

generative_alg.py 文件源码项目：nli_generation 作者: jstarc 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def diversity(dev, gen_test, beam_size, hypo_len, noise_size, per_premise, samples):
    step = len(dev[0]) / samples
    sind = [i * step for i in range(samples)]
    p = Progbar(per_premise * samples)
    for i in sind:
        hypos = []
        unique_words = []
        hypo_list = []
        premise = dev[0][i]
        prem_list = set(cut_zeros(list(premise)))        
        while len(hypos) < per_premise:
            label = np.argmax(dev[2][i])
            words = single_generate(premise, label, gen_test, beam_size, hypo_len, noise_size)
            hypos += [str(ex) for ex in words]
            unique_words += [int(w) for ex in words for w in ex if w > 0]
            hypo_list += [set(cut_zeros(list(ex))) for ex in words]

        jacks = []  
        prem_jacks = []
        for u in range(len(hypo_list)):
            sim_prem = len(hypo_list[u] & prem_list)/float(len(hypo_list[u] | prem_list))
            prem_jacks.append(sim_prem)
            for v in range(u+1, len(hypo_list)):
                sim = len(hypo_list[u] & hypo_list[v])/float(len(hypo_list[u] | hypo_list[v]))
                jacks.append(sim)
        avg_dist_hypo = 1 -  np.mean(jacks)
        avg_dist_prem = 1 -  np.mean(prem_jacks)
        d = entropy(Counter(hypos).values()) 
        w = entropy(Counter(unique_words).values())
        p.add(len(hypos), [('diversity', d),('word_entropy', w),('avg_dist_hypo', avg_dist_hypo), ('avg_dist_prem', avg_dist_prem)])
    arrd = p.sum_values['diversity']
    arrw = p.sum_values['word_entropy']
    arrj = p.sum_values['avg_dist_hypo']
    arrp = p.sum_values['avg_dist_prem']

    return arrd[0] / arrd[1], arrw[0] / arrw[1], arrj[0] / arrj[1],  arrp[0] / arrp[1]

lda_tuna.py 文件源码项目：twitter_LDA_topic_modeling 作者: kenneth-orton 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def jensen_shannon(P, Q):
    M = 0.5 * (P + Q)
    return 0.5 * (entropy(P, M) + entropy(Q, M))

plot_distances.py 文件源码项目：twitter_LDA_topic_modeling 作者: kenneth-orton 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def jensen_shannon_divergence(P, Q):
    _P = np.array(P) / norm(np.array(P), ord=1)
    _Q = np.array(Q) / norm(np.array(Q), ord=1)
    _M = 0.5 * (_P + _Q)
    return 0.5 * (entropy(_P, _M) + entropy(_Q, _M))

methods.py 文件源码项目：CElegansBehaviour 作者: ChristophKirst 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def jensen_shannon_divergence(p,q):
  """Jensen-Shannon distance between distributions p and q"""
  m = (p+q)/2.0;
  return stats.entropy(p,m) + stats.entropy(q,m);

demo_mi.py 文件源码项目：Building-Machine-Learning-Systems-With-Python-Second-Edition 作者: PacktPublishing 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def mutual_info(x, y, bins=10):
    counts_xy, bins_x, bins_y = np.histogram2d(x, y, bins=(bins, bins))
    counts_x, bins = np.histogram(x, bins=bins)
    counts_y, bins = np.histogram(y, bins=bins)

    counts_xy += 1
    counts_x += 1
    counts_y += 1
    P_xy = counts_xy / np.sum(counts_xy, dtype=float)
    P_x = counts_x / np.sum(counts_x, dtype=float)
    P_y = counts_y / np.sum(counts_y, dtype=float)

    I_xy = np.sum(P_xy * np.log2(P_xy / (P_x.reshape(-1, 1) * P_y)))

    return I_xy / (entropy(counts_x) + entropy(counts_y))

__init__.py 文件源码项目：mlprojects-py 作者: srinathperera 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def create_window_based_features(data, window_size):
    central_fn = np.mean
    ma1 = calcuate_window_operation(data, window_size, central_fn)
    ma2 = calcuate_window_operation(data, 2 * window_size, central_fn)
    ma4 = calcuate_window_operation(data, 4 * window_size, central_fn)
    ma8 = calcuate_window_operation(data, 8 * window_size, central_fn)


    entropy = calcuate_window_operation(data, window_size, stats.entropy)
    stddev = calcuate_window_operation(data, window_size, np.std)
    medain_weeksbefore = value_before_period(data, 7)

    return np.column_stack((ma1, ma2, ma4, ma8, entropy, stddev, medain_weeksbefore))

# do cross volidation http://stackoverflow.com/questions/533905/get-the-cartesian-product-of-a-series-of-lists-in-python

estimator.py 文件源码项目：scikit-gstat 作者: mmaelicke 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def entropy(X, bins=None):
    """
    Use the Shannon Entropy H to describe the distribution of the given sample.
    For calculating the Shannon Entropy, the bin edges are needed and can be passed as pk.
    If pk is None, these edges will be calculated using the numpy.histogram function with bins='fq'.
    This uses Freedman Diacons Estimator and is fairly resilient to outliers.
    If the input data X is 2D (Entropy for more than one bin needed), it will derive the histogram once and
    use the same edges in all bins.
    CAUTION: this is actually an changed behaviour to scikit-gstat<=0.1.4

    :param X:  np.ndarray with the given sample to calculate the Shannon entropy from
    :param bins: The bin edges for entropy calculation, or an amount of even spaced bins
    :return:
    """
    _X = np.array(X)

    if any([isinstance(_, (list, np.ndarray)) for _ in _X]):
        # if bins is not set, use the histogram over the full value range
        if bins is None:
            # could not fiugre out a better way here. I need the values before calculating the entropy
            # in order to use the full value range in all bins
            vals = [[np.abs(_[i] - _[i + 1]) for i in np.arange(0, len(_), 2)] for _ in _X]
            bins = np.histogram(vals, bins=15)[1][1:]
        return np.array([entropy(_, bins=bins) for _ in _X])

    # check even
    if len(_X) % 2 > 0:
        raise ValueError('The sample does not have an even length: {}'.format(_X))

    # calculate the values
    vals = [np.abs(_X[i] - _X[i + 1]) for i in np.arange(0, len(_X), 2)]

    # claculate the bins
    if bins is None:
        bins = 15
    pk = np.histogram(vals, bins)[0]

    return scipy_entropy(pk=pk)

matutils.py 文件源码项目：nonce2vec 作者: minimalparts 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def kullback_leibler(vec1, vec2, num_features=None):
    """
    A distance metric between two probability distributions.
    Returns a distance value in range <0,1> where values closer to 0 mean less distance (and a higher similarity)
    Uses the scipy.stats.entropy method to identify kullback_leibler convergence value.
    If the distribution draws from a certain number of docs, that value must be passed.
    """
    if scipy.sparse.issparse(vec1):
        vec1 = vec1.toarray()
    if scipy.sparse.issparse(vec2):
        vec2 = vec2.toarray() # converted both the vectors to dense in case they were in sparse matrix 
    if isbow(vec1) and isbow(vec2): # if they are in bag of words format we make it dense
        if num_features != None: # if not None, make as large as the documents drawing from
            dense1 = sparse2full(vec1, num_features)
            dense2 = sparse2full(vec2, num_features)
            return entropy(dense1, dense2)
        else:
            max_len = max(len(vec1), len(vec2))
            dense1 = sparse2full(vec1, max_len)
            dense2 = sparse2full(vec2, max_len)
            return entropy(dense1, dense2)
    else:
        # this conversion is made because if it is not in bow format, it might be a list within a list after conversion
        # the scipy implementation of Kullback fails in such a case so we pick up only the nested list.
        if len(vec1) == 1:
            vec1 = vec1[0]
        if len(vec2) == 1:
            vec2 = vec2[0]
        return scipy.stats.entropy(vec1, vec2)

feature_squeezing.py 文件源码项目：EvadeML-Zoo 作者: mzweilin 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def kl(x1, x2):
    assert x1.shape == x2.shape
    # x1_2d, x2_2d = reshape_2d(x1), reshape_2d(x2)

    # Transpose to [?, #num_examples]
    x1_2d_t = x1.transpose()
    x2_2d_t = x2.transpose()

    # pdb.set_trace()
    e = entropy(x1_2d_t, x2_2d_t)
    e[np.where(e==np.inf)] = 2
    return e

base.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def f_entropy(p):
    # Convert values to probability
    p = np.bincount(p) / float(p.shape[0])

    ep = stats.entropy(p)
    if ep == -float('inf'):
        return 0.0
    return ep

mdlp.py 文件源码项目：xam 作者: MaxHalford 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def calc_class_entropy(y):
    class_counts = stats.itemfreq(y)[:, 1]
    return stats.entropy(class_counts, base=2)

matutils.py 文件源码项目：ohmnet 作者: marinkaz 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def kullback_leibler(vec1, vec2, num_features=None):
    """
    A distance metric between two probability distributions.
    Returns a distance value in range <0,1> where values closer to 0 mean less distance (and a higher similarity)
    Uses the scipy.stats.entropy method to identify kullback_leibler convergence value.
    If the distribution draws from a certain number of docs, that value must be passed.
    """
    if scipy.sparse.issparse(vec1):
        vec1 = vec1.toarray()
    if scipy.sparse.issparse(vec2):
        vec2 = vec2.toarray() # converted both the vectors to dense in case they were in sparse matrix 
    if isbow(vec1) and isbow(vec2): # if they are in bag of words format we make it dense
        if num_features != None: # if not None, make as large as the documents drawing from
            dense1 = sparse2full(vec1, num_features)
            dense2 = sparse2full(vec2, num_features)
            return entropy(dense1, dense2)
        else:
            max_len = max(len(vec1), len(vec2))
            dense1 = sparse2full(vec1, max_len)
            dense2 = sparse2full(vec2, max_len)
            return entropy(dense1, dense2)
    else:
        # this conversion is made because if it is not in bow format, it might be a list within a list after conversion
        # the scipy implementation of Kullback fails in such a case so we pick up only the nested list.
        if len(vec1) == 1:
            vec1 = vec1[0]
        if len(vec2) == 1:
            vec2 = vec2[0]
        return scipy.stats.entropy(vec1, vec2)

rbm_sample.py 文件源码项目：neurotools 作者: michaelerule 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def barHhv(s):
        '''
        Conditional entropy H(h|v)
        '''
        return np.sum(s.Qv*s.Hhv)

rbm_sample.py 文件源码项目：neurotools 作者: michaelerule 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def barHvh(s):
        '''
        Conditional entropy H(v|h)
        '''
        return np.sum(s.Qh*s.Hvh)

    # ---------------------------------------------------------------------
    # Energies of samples

rbm_sample.py 文件源码项目：neurotools 作者: michaelerule 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def short_report(s):
        Hhs = np.sum(rb.bitent(s.Ph))
        Hvs = np.sum(rb.bitent(s.Pv))
        # print a short report
        print('\nRBM dataset Ns=%s Nh=%s Nv=%s'%(s.Ns,s.Nh,s.Nv))
        print('Vis capacity, maximum',np.sum(rb.bitent(0.5*np.ones(s.Nv))))
        print('Hid capacity, maximum',np.sum(rb.bitent(0.5*np.ones(s.Nh))))
        print('Vis entropy , sampled',Hvs)
        print('Hid entropy , sampled',Hhs)
        print('Entropy difference   ',(Hhs-Hvs))
        print('Mean hidden rate     ',np.mean(s.Ph))
        print('Mean hidde complexity',rb.bitent(np.mean(s.Ph))*s.Nh)

rbm_sample.py 文件源码项目：neurotools 作者: michaelerule 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def long_report(s):
        lgE = np.log2(np.e)
        # Long report
        # print('\nFound dataset %s T=%s Nh=%s Nv=%s'%(DIR,T,Nh,Nv))
        # print('DKL                   %0.2f'%DKL)
        print('\nRBM dataset Ns=%s Nh=%s Nv=%s'%(s.Ns,s.Nh,s.Nv))
        # Hidden layer entropy
        print('==Hidden layer entropy==')
        print('Hid capacity, maximum %0.2f'%(np.sum(rb.bitent(0.5*np.ones(s.Nh)))))
        print('Hid entropy , sampled %0.2f'%(s.Hhs))
        print('Entropy hid sample is %0.2f'%(entropy(s.Qh,base=2)))
        print('<<Eh>h|v>v sampled is %0.2f'%(s.barEhhv*lgE))
        print('<<Eh>h|v>v ufield  is %0.2f'%(s.barEhhv_meanfield*lgE))
        print('Mean hidde complexity %0.2f'%(rb.bitent(np.mean(s.Ph))*s.Nh))
        print('Mean hidden rate      %0.2f'%(np.mean(s.Ph)))
        # Conditional entropy
        print('==Conditional entropy==')
        print('Entropy difference    %0.2f'%(s.Hhs-s.Hvs))
        print('<H_h|v>v           is %0.2f'%(s.barHhv*lgE))
        # Likelihoods
        print('==Negative log-likelihood==')
        print('<<Ev|h>h|v>v sampl is %0.2f'%(s.barEvhhv *lgE))
        print('<<Ev|h>h|v>v ufild is %0.2f'%(s.barEvhhv_meanfield*lgE))
        # KL divergences
        print('==KL divergences==')
        print('<Dkl(h|v||h)>v sam is %0.2f'%(s.barDKLhv*lgE))
        print('<Dkl(h|v||h)>v uf1 is %0.2f'%(s.barDKLhv_meanfield*lgE))
        # Visible entropy; These should be close in value
        print('==Visible layer entropy==')
        print('Vis capacity, maximum %0.2f'%(np.sum(rb.bitent(0.5*np.ones(s.Nv)))))
        print('Vis entropy , sampled %0.2f'%(s.Hvs))
        print('Entropy vis sample is %0.2f'%(entropy(s.Qv,base=2)))
        print('<D(.)+<Ev|h>h|v>v sam %0.2f'%(s.barDKLhv*lgE+s.barEvhhv *lgE))
        print('<D(.)+<Ev|h>h|v>v uf1 %0.2f'%(s.barDKLhv_meanfield*lgE+s.barEvhhv_meanfield*lgE))

test1.py 文件源码项目：Database-Generation-for-Itemset-Mining 作者: clezcano 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def entropy(self, filename, delimeter, itemsetSize, minsup, fun): # fun defines use of build-in entropy or my own
        db = DataBase()
        db.readDB(filename, delimeter)
        dbElem = db.getDBElements()
        dbSize = db.size()

        kItemsetFreq = [float(db.getItemsetSup(set(itemset))) / dbSize for itemset in combinations(dbElem, itemsetSize)]
        sumFreq = sum(kItemsetFreq)
        kItemsetProb = (itemsetFreq / sumFreq for itemsetFreq in kItemsetFreq)
        kItemsetFreq.clear()
        db.getDataBase().clear()
        if fun == 1:
            return entropy(kItemsetProb, base=2)
        elif fun == 2:
            return self.calculateEntropy(kItemsetProb)