shared_functions.py 文件源码-python代码片段

def KL_validate(data_true, data_predicted, n_bins, x_range, n_samples=10000):
    '''"Pr(KL(simulated data||original) > KL(bootstrap original||bootstrap original))'''

    n = data_true.shape[0]

    hist_true, _ = np.histogram(data_true, bins=n_bins, range=x_range)
    hist_predicted, bin_edges = np.histogram(data_predicted, bins=n_bins, range=x_range)

    simulated_KL = sc.entropy(hist_true+1,hist_predicted+1)
    subsampled_KL = []

    for i in xrange(n_samples):
        index1 = np.random.choice(n, n, replace=True)
        index2 = np.random.choice(n, n, replace=True)
        sample1 = data_true[index1]
        sample2 = data_true[index2]
        hist_sample1, _ = np.histogram(sample1, bins=n_bins, range=x_range)
        hist_sample2, _ = np.histogram(sample2, bins=n_bins, range=x_range)
        subsampled_KL.append(sc.entropy(hist_sample2+1,hist_sample1+1))

    subsampled_KL = sorted(subsampled_KL)
    pval = sum( simulated_KL < i for i in subsampled_KL) / float(n_samples)
    conf_interval = (0,subsampled_KL[int(math.ceil(n_samples*0.95))-1])
    return simulated_KL,conf_interval,pval,n

# CONTOUR PLOTS