python类stats()的实例源码

statistics_noise.py 文件源码 项目:privcount 作者: privcount 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def print_privacy_allocation(stats_parameters, sigmas, epsilons, excess_noise_ratio):
    '''
    Print information about sigmas and noise ratios for each statistic.
    '''
    # print epsilons sorted by epsilon allocation
    epsilons_sorted = epsilons.keys()
    epsilons_sorted.sort(key = lambda x: epsilons[x], reverse = True)
    print('Epsilon values\n-----')
    for param in epsilons_sorted:
        print('{}: {}'.format(param, epsilons[param]))
    # print equalized ratio of sigma to expected value
    for param, stats in stats_parameters.iteritems():
        ratio = get_expected_noise_ratio(excess_noise_ratio,
                                         sigmas[param],
                                         stats[1])
        print('Ratio of sigma value to expected value: {}'.format(ratio))
        break

    print ""

    # print allocation of privacy budget
    sigma_params_sorted = sigmas.keys()
    # add dummy counter for full counters.noise.yaml
    dummy_counter = DEFAULT_DUMMY_COUNTER_NAME
    sigma_params_sorted.append(dummy_counter)
    sigmas[dummy_counter] = get_sanity_check_counter()['sigma']
    epsilons[dummy_counter] = get_sanity_check_counter()['epsilon']
    sigma_params_sorted.sort()
    print('Sigma values\n-----')
    print('counters:')
    for param in sigma_params_sorted:
        sigma = sigmas[param]
        print('    {}:'.format(param))
        print('        sigma: {:4f}'.format(sigma))

    print ""
ml_trainer.py 文件源码 项目:BlueWhale 作者: caffe2 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def _setup_initial_blobs(self):
        # Define models
        self.score_model = ModelHelper(name="score_" + self.model_id)
        self.train_model = ModelHelper(name="train_" + self.model_id)

        # Create input, output, labels, and loss blobs
        self.input_blob = "ModelInput_" + self.model_id
        workspace.FeedBlob(self.input_blob, np.zeros(1, dtype=np.float32))
        self.output_blob = "ModelOutput_" + self.model_id
        workspace.FeedBlob(self.output_blob, np.zeros(1, dtype=np.float32))
        self.labels_blob = "ModelLabels_" + self.model_id
        workspace.FeedBlob(self.labels_blob, np.zeros(1, dtype=np.float32))
        self.loss_blob = "loss"  # "ModelLoss_" + self.model_id
        workspace.FeedBlob(self.loss_blob, np.zeros(1, dtype=np.float32))

        # Create blobs for model parameters
        self.weights: List[str] = []
        self.biases: List[str] = []

        for x in six.moves.range(len(self.layers) - 1):
            dim_in = self.layers[x]
            dim_out = self.layers[x + 1]

            weight_name = "Weights_" + str(x) + "_" + self.model_id
            bias_name = "Biases_" + str(x) + "_" + self.model_id
            self.weights.append(weight_name)
            self.biases.append(bias_name)

            bias = np.zeros(
                shape=[
                    dim_out,
                ], dtype=np.float32
            )
            workspace.FeedBlob(bias_name, bias)

            gain = np.sqrt(2) if self.activations[x] == 'relu' else 1
            weights = scipy.stats.norm(0, gain * np.sqrt(1 / dim_in)).rvs(
                size=[dim_out, dim_in]
            ).astype(np.float32)
            workspace.FeedBlob(weight_name, weights)
ml_conv_trainer.py 文件源码 项目:BlueWhale 作者: caffe2 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def _setup_initial_blobs(self):
        MLTrainer._setup_initial_blobs(self)

        self.output_conv_blob = "Conv_output_{}".format(self.model_id)
        workspace.FeedBlob(self.output_conv_blob, np.zeros(1, dtype=np.float32))

        self.conv_weights: List[str] = []
        self.conv_biases: List[str] = []

        for x in six.moves.range(len(self.dims) - 1):
            dim_in = self.dims[x]
            dim_out = self.dims[x + 1]
            kernel_h = self.conv_height_kernels[x]
            kernel_w = self.conv_width_kernels[x]

            weight_shape = [dim_out, kernel_h, kernel_w, dim_in]
            bias_shape = [dim_out, ]

            conv_weight_name = "ConvWeights_" + str(x) + "_" + self.model_id
            bias_name = "ConvBiases_" + str(x) + "_" + self.model_id
            self.conv_weights.append(conv_weight_name)
            self.conv_biases.append(bias_name)

            conv_bias = np.zeros(shape=bias_shape, dtype=np.float32)
            workspace.FeedBlob(bias_name, conv_bias)

            conv_weights = scipy.stats.norm(0, np.sqrt(1 / dim_in)).rvs(
                size=weight_shape
            ).astype(np.float32)
            workspace.FeedBlob(conv_weight_name, conv_weights)
stats.py 文件源码 项目:pyrsss 作者: butala 项目源码 文件源码 阅读 44 收藏 0 点赞 0 评论 0
def robust_std(l, alpha=1/scipy.stats.norm.ppf(0.75)):
    """
    Compute a robust estimate of the standard deviation for the list
    of values *l* (by default, for normally distributed samples ---
    see https://en.wikipedia.org/wiki/Median_absolute_deviation).
    """
    return alpha * mad(l)[0]
stats.py 文件源码 项目:pyrsss 作者: butala 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = ArgumentParser('Report mean and standard deviation for the stream of numbers read from stdin',
                            formatter_class=ArgumentDefaultsHelpFormatter)
    args = parser.parse_args(argv[1:])

    stats = Stats()
    for line in sys.stdin:
        stats(float(line))
    print(stats)
distributions.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def get(name):
    try:
        return getattr(stats, name)
    except AttributeError:
        raise PlotnineError(
            "Unknown distribution '{}'".format(name))
distributions.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def get_continuous_distribution(name):
    if name not in continuous:
        msg = "Unknown continuous distribution '{}'"
        raise ValueError(msg.format(name))

    return getattr(stats, name)
gleu.py 文件源码 项目:smt-for-gec 作者: cnap 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def get_gleu_stats(self, scores):
        """calculate mean and confidence interval from all GLEU iterations"""
        mean = np.mean(scores)
        std = np.std(scores)
        ci = scipy.stats.norm.interval(0.95, loc=mean, scale=std)
        return ['%f' % mean,
                '%f' % std,
                '(%.3f,%.3f)' % (ci[0], ci[1])]
common.py 文件源码 项目:singlecell-dash 作者: czbiohub 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def diff_exp(matrix, group1, group2, index):
    """Computes differential expression between group 1 and group 2
    for each column in the dataframe counts.

    Returns a dataframe of Z-scores and p-values."""

    g1 = matrix[group1, :]
    g2 = matrix[group2, :]

    g1mu = g1.mean(0)
    g2mu = g2.mean(0)

    mean_diff = np.asarray(g1mu - g2mu).flatten()
    # E[X^2] - (E[X])^2
    pooled_sd = np.sqrt(
        ((g1.power(2)).mean(0) - np.power(g1mu, 2)) / len(group1)
        + ((g2.power(2)).mean(0) - np.power(g2mu, 2)) / len(group2))
    pooled_sd = np.asarray(pooled_sd).flatten()

    z_scores = np.zeros_like(pooled_sd)
    nz = pooled_sd > 0
    z_scores[nz] = np.nan_to_num(mean_diff[nz] / pooled_sd[nz])

    # t-test
    p_vals = (1 - stats.norm.cdf(np.abs(z_scores))) * 2

    df = pd.DataFrame(OrderedDict([('z', z_scores), ('p', p_vals)]),
                      index=index)

    return df
common.py 文件源码 项目:singlecell-dash 作者: czbiohub 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def calculate_plate_summaries(self):
        """Get mean reads, percent mapping, etc summaries for each plate"""
        well_map = self.cell_metadata.groupby(Plates.SAMPLE_MAPPING)

        # these stats are from STAR mapping
        star_cols = ['Number of input reads', 'Uniquely mapped reads number']
        star_stats = self.mapping_stats[star_cols].groupby(
                self.cell_metadata[Plates.SAMPLE_MAPPING]).sum()

        total_reads = star_stats['Number of input reads']
        unique_reads = star_stats['Uniquely mapped reads number']

        percent_ercc = well_map.sum()['ercc'].divide(total_reads, axis=0)
        percent_mapped_reads = unique_reads / total_reads - percent_ercc

        plate_summaries = pd.DataFrame(OrderedDict([
            (Plates.MEAN_READS_PER_CELL, total_reads / well_map.size()),
            (Plates.MEDIAN_GENES_PER_CELL, well_map.median()['n_genes']),
            ('Percent not uniquely aligned', 100 * well_map.sum()['alignment_not_unique'].divide(total_reads, axis=0)),
            (Plates.PERCENT_MAPPED_READS, 100 * percent_mapped_reads),
            ('Percent no feature', 100 * well_map.sum()['no_feature'].divide(total_reads, axis=0)),
            ('Percent Rn45s', 100 * self.genes['Rn45s'].groupby(
                    self.cell_metadata[Plates.SAMPLE_MAPPING]).sum() / total_reads),
            (Plates.PERCENT_ERCC, 100 * percent_ercc),
            ('n_wells', well_map.size())
        ]))

        return plate_summaries
brsa.py 文件源码 项目:brainiak 作者: brainiak 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def _bin_exp(self, n_bin, scale=1.0):
        """ Calculate the bin locations to approximate exponential distribution.
            It breaks the cumulative probability of exponential distribution
            into n_bin equal bins, each covering 1 / n_bin probability. Then it
            calculates the center of mass in each bins and returns the
            centers of mass. So, it approximates the exponential distribution
            with n_bin of Delta function weighted by 1 / n_bin, at the
            locations of these centers of mass.
        Parameters:
        -----------
        n_bin: int
            The number of bins to approximate the exponential distribution
        scale: float, default: 1.0
            The scale parameter of the exponential distribution, defined in
            the same way as scipy.stats. It does not influence the ratios
            between the bins, but just controls the spacing between the bins.
            So generally users should not change its default.
        Returns:
        --------
        bins: numpy array of size [n_bin,]
            The centers of mass for each segment of the
            exponential distribution.
        """
        boundaries = np.flip(scipy.stats.expon.isf(
            np.linspace(0, 1, n_bin + 1),
            scale=scale), axis=0)
        bins = np.empty(n_bin)
        for i in np.arange(n_bin):
            bins[i] = utils.center_mass_exp(
                (boundaries[i], boundaries[i + 1]), scale=scale)
        return bins
brsa.py 文件源码 项目:brainiak 作者: brainiak 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _set_SNR_grids(self):
        """ Set the grids and weights for SNR used in numerical integration
            of SNR parameters.
        """
        if self.SNR_prior == 'unif':
            SNR_grids = np.linspace(0, 1, self.SNR_bins)
            SNR_weights = np.ones(self.SNR_bins) / (self.SNR_bins - 1)
            SNR_weights[0] = SNR_weights[0] / 2.0
            SNR_weights[-1] = SNR_weights[-1] / 2.0
        elif self.SNR_prior == 'lognorm':
            dist = scipy.stats.lognorm
            alphas = np.arange(np.mod(self.SNR_bins, 2),
                               self.SNR_bins + 2, 2) / self.SNR_bins
            # The goal here is to divide the area under the pdf curve
            # to segments representing equal probabilities.
            bounds = dist.interval(alphas, (self.logS_range,))
            bounds = np.unique(bounds)
            # bounds contain the boundaries which equally separate
            # the probability mass of the distribution
            SNR_grids = np.zeros(self.SNR_bins)
            for i in np.arange(self.SNR_bins):
                SNR_grids[i] = dist.expect(
                    lambda x: x, args=(self.logS_range,),
                    lb=bounds[i], ub=bounds[i + 1]) * self.SNR_bins
            # Center of mass of each segment between consecutive
            # bounds are set as the grids for SNR.
            SNR_weights = np.ones(self.SNR_bins) / self.SNR_bins
        else:  # SNR_prior == 'exp'
            SNR_grids = self._bin_exp(self.SNR_bins)
            SNR_weights = np.ones(self.SNR_bins) / self.SNR_bins
        SNR_weights = SNR_weights / np.sum(SNR_weights)
        return SNR_grids, SNR_weights
bmcmc_test.py 文件源码 项目:bmcmc 作者: sanjibs 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def lnfunc(self,args):
        # define the function which needs to be sampled (log posterior)
        # Do not use self.args in this function. You can use self.eargs. 
        return scipy.stats.norm.logpdf(args['x'],loc=args['mu'],scale=args['sigma'])
bmcmc_test.py 文件源码 项目:bmcmc 作者: sanjibs 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def lnfunc(self,args):
       # log posterior
       temp1=scipy.stats.norm.logpdf(args['xt'],loc=args['mu'],scale=args['sigma'])
       temp2=scipy.stats.norm.logpdf(args['x'],loc=args['xt'],scale=args['sigma_x'])
       return temp1+temp2
bmcmc_test.py 文件源码 项目:bmcmc 作者: sanjibs 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def lnfunc(self,args):
        if self.eargs['outliers'] == False:
            temp1=(args['y']-(self.args['m']*self.args['x']+self.args['c']))/args['sigma_y']
            return -0.5*(temp1*temp1)-np.log(np.sqrt(2*np.pi)*args['sigma_y'])
        else:
            temp11=scipy.stats.norm.pdf(args['y'],loc=(self.args['m']*self.args['x']+self.args['c']),scale=args['sigma_y'])
            sigma_b=np.sqrt(np.square(args['sigma_y'])+np.square(args['sigma_b']))
            temp22=scipy.stats.norm.pdf(args['y'],loc=self.args['mu_b'],scale=sigma_b)
            return np.log((1-args['p_b'])*temp11+args['p_b']*temp22)
bmcmc_test.py 文件源码 项目:bmcmc 作者: sanjibs 项目源码 文件源码 阅读 69 收藏 0 点赞 0 评论 0
def lnfunc(self,args):
       # log posterior, xt has been integrated out
       sigma=np.sqrt(args['sigma']*args['sigma']+args['sigma_x']*args['sigma_x'])
       temp=scipy.stats.norm.logpdf(args['x'],loc=args['mu'],scale=sigma)
       return temp
bmcmc_test.py 文件源码 项目:bmcmc 作者: sanjibs 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def lnfunc(self,args):
       # log posterior
        temp1=[]
        for i,y in enumerate(self.data['y']):
            temp1.append(np.sum(self.lngauss(y-args['alpha'][i],self.eargs['sigma'])))
        temp1=np.array(temp1)
        temp2=scipy.stats.norm.logpdf(args['alpha'],loc=args['mu'],scale=args['omega'])
        return temp1+temp2
illumination_correction.py 文件源码 项目:DeepProfiler 作者: jccaicedo 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __init__(self, stats, channels, target_dim):
        self.stats = stats
        self.channels = channels
        self.target_dim = (target_dim[0], target_dim[1])
        self.illum_corr_func = np.zeros((self.target_dim[0], self.target_dim[1], len(self.channels)))

    # Based on Sing et al. 2014 paper
illumination_correction.py 文件源码 项目:DeepProfiler 作者: jccaicedo 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def channel_function(self, mean_channel, disk_size):
        #TODO: get np.type from other source or parameterize or compute :/
        # We currently assume 16 bit images
        operator = skimage.morphology.disk(disk_size)
        filtered_channel = skimage.filters.median(mean_channel.astype(np.uint16), operator)
        filtered_channel = skimage.transform.resize(filtered_channel, self.target_dim, preserve_range=True)
        robust_minimum = scipy.stats.scoreatpercentile(filtered_channel, 2)
        filtered_channel = np.maximum(filtered_channel, robust_minimum)
        illum_corr_func = filtered_channel / robust_minimum
        return illum_corr_func
illumination_correction.py 文件源码 项目:DeepProfiler 作者: jccaicedo 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def compute_all(self, median_filter_size):
        disk_size = median_filter_size / 2  # From diameter to radius
        for ch in range(len(self.channels)):
            self.illum_corr_func[:, :, ch] = self.channel_function(self.stats["mean_image"][:, :, ch], disk_size)

    # TODO: Is image a uint16 or float32/64? What is its data type?
    # TODO: Update the test as appropriate.
    # Not being used? Not needed?


问题


面经


文章

微信
公众号

扫码关注公众号