python类percentile()的实例源码

norm_post_sim.py 文件源码 项目:pyflux 作者: RJT1990 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def norm_post_sim(modes,cov_matrix):
    post = multivariate_normal(modes,cov_matrix)
    nsims = 30000
    phi = np.zeros([nsims,len(modes)])

    for i in range(0,nsims):
         phi[i] = post.rvs()

    chain = np.array([phi[i][0] for i in range(len(phi))])
    for m in range(1,len(modes)):
        chain = np.vstack((chain,[phi[i][m] for i in range(len(phi))]))

    mean_est = [np.mean(np.array([phi[i][j] for i in range(len(phi))])) for j in range(len(modes))]
    median_est = [np.median(np.array([phi[i][j] for i in range(len(phi))])) for j in range(len(modes))]
    upper_95_est = [np.percentile(np.array([phi[i][j] for i in range(len(phi))]),95) for j in range(len(modes))]
    lower_95_est = [np.percentile(np.array([phi[i][j] for i in range(len(phi))]),5) for j in range(len(modes))]

    return chain, mean_est, median_est, upper_95_est, lower_95_est
calculate_aggregate_statistics.py 文件源码 项目:tbp-next-basket 作者: GiulioRossetti 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def calculate_aggregate(values):
    agg_measures = {
        'avg': np.mean(values),
        'std': np.std(values),
        'var': np.var(values),
        'med': np.median(values),
        '10p': np.percentile(values, 10),
        '25p': np.percentile(values, 25),
        '50p': np.percentile(values, 50),
        '75p': np.percentile(values, 75),
        '90p': np.percentile(values, 90),
        'iqr': np.percentile(values, 75) - np.percentile(values, 25),
        'iqm': interquartile_range_mean(values),
        'mad': mean_absolute_deviation(values),
        'cov': 1.0 * np.mean(values) / np.std(values),
        'gin': gini_coefficient(values),
        'skw': stats.skew(values),
        'kur': stats.kurtosis(values),
        'sum': np.sum(values)
    }

    return agg_measures
speech_eval_old.py 文件源码 项目:lorelei-speech-evaluation 作者: usc-sail 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_pr(reference_frames,output_frames,mode='type',pr_resolution=100):

    # filter output by confidence
    confidence = collect_confidence(output_frames)
    conf_order = []
    step = 100/float(pr_resolution)
    for j in range(1,pr_resolution+1):
        conf_order.append( np.percentile(confidence, j*step) )
    conf_order = [-1] + conf_order + [2]

    # get curve
    params = []
    for threshold in conf_order:
        params.append( [ reference_frames, output_frames, confidence, threshold, mode ] )
    all_tp, all_fp, all_fn, all_prec, all_rec = zip(*pool.map(single_point, params))
    all_prec = list(all_prec) #+ [0]
    all_rec  = list(all_rec) #+ [1]

    all_rec, all_prec = zip(*sorted(zip(all_rec, all_prec)))
    AUC = metrics.auc(all_rec, all_prec)

    return all_rec, all_prec, AUC


# create complete output for 1 mode --------------------------------------------
results_stat.py 文件源码 项目:IntelliChess 作者: lifei96 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def results_stat():
    for file in os.listdir('./results'):
        if file.endswith('.txt'):
            file_path = os.path.join('./results', file)
            data = [[], [], [], []]
            with open(file_path, 'r') as f:
                lines = f.read().split('\n')
                for line in lines:
                    if len(line) > 0:
                        line_data = map(eval, line.split())
                        for i in range(4):
                            data[i].append(line_data[i])
            lines = [[], [], [], []]
            for i in range(4):
                lines[i].append(np.mean(data[i]))
                lines[i].append(np.std(data[i]))
                lines[i].append(np.min(data[i]))
                lines[i].append(np.percentile(data[i], 25))
                lines[i].append(np.percentile(data[i], 50))
                lines[i].append(np.percentile(data[i], 75))
                lines[i].append(np.max(data[i]))
            output_path = os.path.join('./stat', file)
            with open(output_path, 'w') as f:
                for line in lines:
                    f.write('\t'.join(map(str, line)) + '\n')
methods_kharita.py 文件源码 项目:kharita 作者: vipyoung 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def splitclusters(datapointwts,seeds,theta):
    std = {}; seeds1 = []; seedweight = [];
    cluster, p2cluster = point2cluster(datapointwts, seeds,theta);
    for cl in cluster:
        mang = seeds[cl][-1];
        if len(cluster[cl]) > 10:
            std[cl] = np.percentile([angledist(xx[2], mang) for xx in cluster[cl]], 90)
            clockwise = [xx for xx in cluster[cl] if greaterthanangle(xx[2], mang)];
            if std[cl]>20 and len(clockwise)>0 and len(clockwise)<len(cluster[cl]):
                seeds1.append(avgpoint(clockwise))
                seeds1.append(avgpoint([xx for xx in cluster[cl] if not greaterthanangle(xx[2], mang)]))
                seedweight.append(len(clockwise))
                seedweight.append(len(cluster[cl]) -len(clockwise))
            else:
                seeds1.append(seeds[cl]); seedweight.append(len(cluster[cl]))
        else:
            seeds1.append(seeds[cl]); seedweight.append(len(cluster[cl]))
    return seeds1, seedweight
methods_kharita.py 文件源码 项目:kharita 作者: vipyoung 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def splitclustersparallel(datapointwts,seeds):
    X = [(xx[0], xx[1]) for xx in datapointwts];    S = [(xx[0], xx[1]) for xx in seeds];cluster = {};p2cluster = []; gedges = {}; gedges1 = {}; nedges = {}; std = {}; seeds1 = []; seedweight = []; roadwidth = [];
    nbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(S)
    distances, indices = nbrs.kneighbors(X)
    for cd in range(len(seeds)):
        cluster[cd] = []; roadwidth.append(0);
    for ii, ll in enumerate(indices):
        dd = [taxidist(seeds[xx], datapointwts[ii][:-1],theta) for xx in ll]
        cd = ll[dd.index(min(dd))];
        cluster[cd].append(datapointwts[ii])
        p2cluster.append(cd)
    for cl in cluster:
        mang = seeds[cl][-1];
        scl = seeds[cl]
        if len(cluster[cl]) > 10:
            std[cl] = np.percentile([angledist(xx[2], mang) for xx in cluster[cl]], 90)
            roadwidth[cl] = 1+5*np.std([geodist(scl,xx)*np.sin(anglebetweentwopoints(scl,xx)-scl[-1])  for xx in cluster[cl]])
            print(cl,scl,[(anglebetweentwopoints(scl,xx),scl[-1])  for xx in cluster[cl]])
ClusterOptions.py 文件源码 项目:pbtranscript 作者: PacificBiosciences 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _write_config(self, fasta_filename):
        """Write daligner sensitive config to fasta_filename.sensitive.config."""
        lens = [len(r.sequence) for r in ContigSetReaderWrapper(fasta_filename)]
        self.low_cDNA_size, self.high_cDNA_size = 0, 0
        if len(lens) == 1:
            self.low_cDNA_size, self.high_cDNA_size = lens[0], lens[0]
        if len(lens) >= 2:
            self.low_cDNA_size  = int(np.percentile(lens, 10))
            self.high_cDNA_size = int(np.percentile(lens, 90))

        try:
            with open(fasta_filename+'.sensitive.config', 'w') as f:
                f.write("sensitive={s}\n".format(s=self.sensitive_mode))
                f.write("low={l}\n".format(l=self.low_cDNA_size))
                f.write("high={h}\n".format(h=self.high_cDNA_size))
        except IOError:
            pass # it's OK not to have write permission
BootstrapReserve.py 文件源码 项目:chainladder-python 作者: jbogaardt 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def summary(self):
        """ Method to produce a summary table of of the Mack Chainladder 
        model.

        Returns:
            This calculation is consistent with the R calculation 
            BootChainLadder$summary

        """
        IBNR = self.IBNR
        tri = self.tri
        summary = pd.DataFrame()
        summary['Latest'] = tri.get_latest_diagonal()
        summary['Mean Ultimate'] = summary['Latest'] + pd.Series([np.mean(np.array(IBNR)[:,num]) for num in range(len(tri.data))],index=summary.index)
        summary['Mean IBNR'] = summary['Mean Ultimate'] - summary['Latest'] 
        summary['SD IBNR'] = pd.Series([np.std(np.array(IBNR)[:,num]) for num in range(len(tri.data))],index=summary.index)
        summary['IBNR 75%'] = pd.Series([np.percentile(np.array(IBNR)[:,num],q=75) for num in range(len(tri.data))],index=summary.index)
        summary['IBNR 95%'] = pd.Series([np.percentile(np.array(IBNR)[:,num],q=95) for num in range(len(tri.data))],index=summary.index)
        return summary
audio_tools.py 文件源码 项目:tools 作者: kastnerkyle 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def run_ltsd_example():
    fs, d = fetch_sample_speech_tapestry()
    winsize = 1024
    d = d.astype("float32") / 2 ** 15
    d -= d.mean()

    pad = 3 * fs
    noise_pwr = np.percentile(d, 1) ** 2
    noise_pwr = max(1E-9, noise_pwr)
    d = np.concatenate((np.zeros((pad,)) + noise_pwr * np.random.randn(pad), d))
    _, vad_segments = ltsd_vad(d, fs, winsize=winsize)
    v_up = np.where(vad_segments == True)[0]
    s = v_up[0]
    st = v_up[-1] + int(.5 * fs)
    d = d[s:st]

    bname = "tapestry.wav".split(".")[0]
    wavfile.write("%s_out.wav" % bname, fs, soundsc(d))
audio_tools.py 文件源码 项目:tools 作者: kastnerkyle 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def run_ltsd_example():
    fs, d = fetch_sample_speech_tapestry()
    winsize = 1024
    d = d.astype("float32") / 2 ** 15
    d -= d.mean()

    pad = 3 * fs
    noise_pwr = np.percentile(d, 1) ** 2
    noise_pwr = max(1E-9, noise_pwr)
    d = np.concatenate((np.zeros((pad,)) + noise_pwr * np.random.randn(pad), d))
    _, vad_segments = ltsd_vad(d, fs, winsize=winsize)
    v_up = np.where(vad_segments == True)[0]
    s = v_up[0]
    st = v_up[-1] + int(.5 * fs)
    d = d[s:st]

    bname = "tapestry.wav".split(".")[0]
    wavfile.write("%s_out.wav" % bname, fs, soundsc(d))
adagan.py 文件源码 项目:adagan 作者: tolstikhin 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _compute_data_weights_topk(self, opts, density_ratios):
        """Put a uniform distribution on K points with largest prob real data.

        This is a naiive heuristic which makes next GAN concentrate on those
        points of the training set, which were classified correctly with
        largest margins. I.e., out current mixture model is not capable of
        generating points looking similar to these ones.
        """
        threshold = np.percentile(density_ratios,
                                  opts["topk_constant"]*100.0)
        # Note that largest prob_real_data corresponds to smallest density
        # ratios.
        mask = density_ratios <= threshold
        data_weights = np.zeros(self._data_num)
        data_weights[mask] = 1.0 / np.sum(mask)
        return data_weights
binnings.py 文件源码 项目:physt 作者: janpipek 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def quantile_binning(data=None, bins=10, qrange=(0.0, 1.0), **kwargs):
    """Binning schema based on quantile ranges.

    This binning finds equally spaced quantiles. This should lead to
    all bins having roughly the same frequencies.

    Note: weights are not (yet) take into account for calculating
    quantiles.

    Parameters
    ----------
    bins: sequence or Optional[int]
        Number of bins
    qrange: Optional[tuple]
        Two floats as minimum and maximum quantile (default: 0.0, 1.0)

    Returns
    -------
    StaticBinning
    """

    if np.isscalar(bins):
        bins = np.linspace(qrange[0] * 100, qrange[1] * 100, bins + 1)
    bins = np.percentile(data, bins)
    return static_binning(bins=make_bin_array(bins), includes_right_edge=True)
Utilities.py 文件源码 项目:BayesVP 作者: cameronliang 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def estimate_bayes_factor(traces, logp, r=0.05):
    """
    Esitmate Odds ratios in a random subsample of the chains in MCMC
    AstroML (see Eqn 5.127, pg 237)
    """

    ndim, nsteps = traces.shape # [ndim,number of steps in chain]

    # compute volume of a n-dimensional (ndim) sphere of radius r
    Vr = np.pi ** (0.5 * ndim) / gamma(0.5 * ndim + 1) * (r ** ndim)

    # use neighbor count within r as a density estimator
    bt = BallTree(traces.T)
    count = bt.query_radius(traces.T, r=r, count_only=True)

    # BF = N*p/rho
    bf = logp + np.log(nsteps) + np.log(Vr) - np.log(count) #log10(bf)

    p25, p50, p75 = np.percentile(bf, [25, 50, 75])
    return p50, 0.7413 * (p75 - p25)

########################################################################
analyse.py 文件源码 项目:azmq 作者: ereOn 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def reducer(self, key, values):
        if key == 'latencies':
            values = list(values)
            yield 'average_latency', numpy.average(values)
            yield 'median_latency', numpy.median(values)
            yield '95th_percentile_latency', numpy.percentile(values, 95)
        elif key == 'sent':
            first, last, count = minmax(values)
            yield 'count', count
            yield 'sending_first', first
            yield 'sending_last', last
            yield 'sending_overhead', (last - first) / (count - 1)
            yield 'sending_throughput', (count - 1) / (last - first)
        elif key == 'received':
            first, last, count = minmax(values)
            yield 'receiving_first', first
            yield 'receiving_last', last
            yield 'receiving_overhead', (last - first) / (count - 1)
            yield 'receiving_throughput', (count - 1) / (last - first)
finetuning.py 文件源码 项目:DeepMoji 作者: bfelbo 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def calculate_batchsize_maxlen(texts):
    """ Calculates the maximum length in the provided texts and a suitable
        batch size. Rounds up maxlen to the nearest multiple of ten.

    # Arguments:
        texts: List of inputs.

    # Returns:
        Batch size,
        max length
    """
    def roundup(x):
        return int(math.ceil(x / 10.0)) * 10

    # Calculate max length of sequences considered
    # Adjust batch_size accordingly to prevent GPU overflow
    lengths = [len(tokenize(t)) for t in texts]
    maxlen = roundup(np.percentile(lengths, 80.0))
    batch_size = 250 if maxlen <= 100 else 50
    return batch_size, maxlen
gnk.py 文件源码 项目:elfi 作者: elfi-dev 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def ss_octile(y):
    """Obtain the octile summary statistic.

    The statistic reaches the optimal performance upon a high number of
    observations. According to Allingham et al. (2009), it is more stable than ss_robust.

    Parameters
    ----------
    y : array_like
        Yielded points.

    Returns
    -------
    array_like of the shape (batch_size, dim_ss=8, dim_ss_point)

    """
    octiles = np.linspace(12.5, 87.5, 7)
    E1, E2, E3, E4, E5, E6, E7 = np.percentile(y, octiles, axis=1)

    # Combining the summary statistics.
    ss_octile = np.hstack((E1, E2, E3, E4, E5, E6, E7))
    ss_octile = ss_octile[:, :, np.newaxis]
    return ss_octile
outliers.py 文件源码 项目:py-hadoop-tutorial 作者: hougs 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def flag_outliers(series, iqr_multiplier=1.5):
    """Use Tukey's boxplot criterion for outlier identification.
    """
    top_quartile_cutoff = np.percentile(series.get_values(), 75)
    bottom_quartile_cutoff = np.percentile(series.get_values(), 25)
    # Compute interquartile range
    iqr = top_quartile_cutoff - bottom_quartile_cutoff
    top_outlier_cutoff = top_quartile_cutoff + iqr * iqr_multiplier
    bottom_outlier_cutoff = bottom_quartile_cutoff - iqr * iqr_multiplier
    return series[(series < bottom_outlier_cutoff) | (series > top_outlier_cutoff)]


# In[ ]:
utils.py 文件源码 项目:retinex_for_mri 作者: ofgulban 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def truncate_and_scale(data, percMin=0.01, percMax=99.9, zeroTo=1.0):
    """Truncate and scale the data as a preprocessing step.

    Parameters
    ----------
    data : nd numpy array
        Data/image to be truncated and scaled.
    percMin : float, positive
        Minimum percentile to be truncated.
    percMax : float, positive
        Maximum percentile to be truncated.
    zeroTo : float
        Data will be returned in the range from 0 to this number.

    Returns
    -------
    data : nd numpy array
        Truncated and scaled data/image.

    """
    # adjust minimum
    percDataMin = np.percentile(data, percMin)
    data[np.where(data < percDataMin)] = percDataMin
    data = data - data.min()

    # adjust maximum
    percDataMax = np.percentile(data, percMax)
    data[np.where(data > percDataMax)] = percDataMax
    data = 1./data.max() * data
    return data * zeroTo
utils.py 文件源码 项目:CASIA-HWDB1.1-cnn 作者: integeruser 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def preprocess_bitmap(bitmap):
    # contrast stretching
    p2, p98 = np.percentile(bitmap, (2, 98))
    assert abs(p2-p98) > 10
    bitmap = skimage.exposure.rescale_intensity(bitmap, in_range=(p2, p98))

    # from skimage.filters import threshold_otsu
    # thresh = threshold_otsu(bitmap)
    # bitmap = bitmap > thresh
    return bitmap
common.py 文件源码 项目:cellranger 作者: 10XGenomics 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def plot_tsne_totalcounts(chart, sample_properties, sample_data):
    """ Plot cells colored by total counts """
    analysis = sample_data.analysis
    if not analysis or len(sample_properties['genomes']) > 1:
        return None

    reads_per_bc = analysis.matrix.get_reads_per_bc()
    vmin, vmax = np.percentile(reads_per_bc, ws_gex_constants.TSNE_TOTALCOUNTS_PRCT_CLIP)

    return plot_dimensions_color(chart, analysis.get_tsne().transformed_tsne_matrix,
                                 reads_per_bc,
                                 ws_gex_constants.TSNE_TOTALCOUNTS_DESCRIPTION,
                                 vmin, vmax,
                                 1, 2)


问题


面经


文章

微信
公众号

扫码关注公众号