def print_privacy_allocation(stats_parameters, sigmas, epsilons, excess_noise_ratio):
'''
Print information about sigmas and noise ratios for each statistic.
'''
# print epsilons sorted by epsilon allocation
epsilons_sorted = epsilons.keys()
epsilons_sorted.sort(key = lambda x: epsilons[x], reverse = True)
print('Epsilon values\n-----')
for param in epsilons_sorted:
print('{}: {}'.format(param, epsilons[param]))
# print equalized ratio of sigma to expected value
for param, stats in stats_parameters.iteritems():
ratio = get_expected_noise_ratio(excess_noise_ratio,
sigmas[param],
stats[1])
print('Ratio of sigma value to expected value: {}'.format(ratio))
break
print ""
# print allocation of privacy budget
sigma_params_sorted = sigmas.keys()
# add dummy counter for full counters.noise.yaml
dummy_counter = DEFAULT_DUMMY_COUNTER_NAME
sigma_params_sorted.append(dummy_counter)
sigmas[dummy_counter] = get_sanity_check_counter()['sigma']
epsilons[dummy_counter] = get_sanity_check_counter()['epsilon']
sigma_params_sorted.sort()
print('Sigma values\n-----')
print('counters:')
for param in sigma_params_sorted:
sigma = sigmas[param]
print(' {}:'.format(param))
print(' sigma: {:4f}'.format(sigma))
print ""
python类stats()的实例源码
def _setup_initial_blobs(self):
# Define models
self.score_model = ModelHelper(name="score_" + self.model_id)
self.train_model = ModelHelper(name="train_" + self.model_id)
# Create input, output, labels, and loss blobs
self.input_blob = "ModelInput_" + self.model_id
workspace.FeedBlob(self.input_blob, np.zeros(1, dtype=np.float32))
self.output_blob = "ModelOutput_" + self.model_id
workspace.FeedBlob(self.output_blob, np.zeros(1, dtype=np.float32))
self.labels_blob = "ModelLabels_" + self.model_id
workspace.FeedBlob(self.labels_blob, np.zeros(1, dtype=np.float32))
self.loss_blob = "loss" # "ModelLoss_" + self.model_id
workspace.FeedBlob(self.loss_blob, np.zeros(1, dtype=np.float32))
# Create blobs for model parameters
self.weights: List[str] = []
self.biases: List[str] = []
for x in six.moves.range(len(self.layers) - 1):
dim_in = self.layers[x]
dim_out = self.layers[x + 1]
weight_name = "Weights_" + str(x) + "_" + self.model_id
bias_name = "Biases_" + str(x) + "_" + self.model_id
self.weights.append(weight_name)
self.biases.append(bias_name)
bias = np.zeros(
shape=[
dim_out,
], dtype=np.float32
)
workspace.FeedBlob(bias_name, bias)
gain = np.sqrt(2) if self.activations[x] == 'relu' else 1
weights = scipy.stats.norm(0, gain * np.sqrt(1 / dim_in)).rvs(
size=[dim_out, dim_in]
).astype(np.float32)
workspace.FeedBlob(weight_name, weights)
def _setup_initial_blobs(self):
MLTrainer._setup_initial_blobs(self)
self.output_conv_blob = "Conv_output_{}".format(self.model_id)
workspace.FeedBlob(self.output_conv_blob, np.zeros(1, dtype=np.float32))
self.conv_weights: List[str] = []
self.conv_biases: List[str] = []
for x in six.moves.range(len(self.dims) - 1):
dim_in = self.dims[x]
dim_out = self.dims[x + 1]
kernel_h = self.conv_height_kernels[x]
kernel_w = self.conv_width_kernels[x]
weight_shape = [dim_out, kernel_h, kernel_w, dim_in]
bias_shape = [dim_out, ]
conv_weight_name = "ConvWeights_" + str(x) + "_" + self.model_id
bias_name = "ConvBiases_" + str(x) + "_" + self.model_id
self.conv_weights.append(conv_weight_name)
self.conv_biases.append(bias_name)
conv_bias = np.zeros(shape=bias_shape, dtype=np.float32)
workspace.FeedBlob(bias_name, conv_bias)
conv_weights = scipy.stats.norm(0, np.sqrt(1 / dim_in)).rvs(
size=weight_shape
).astype(np.float32)
workspace.FeedBlob(conv_weight_name, conv_weights)
def robust_std(l, alpha=1/scipy.stats.norm.ppf(0.75)):
"""
Compute a robust estimate of the standard deviation for the list
of values *l* (by default, for normally distributed samples ---
see https://en.wikipedia.org/wiki/Median_absolute_deviation).
"""
return alpha * mad(l)[0]
def main(argv=None):
if argv is None:
argv = sys.argv
parser = ArgumentParser('Report mean and standard deviation for the stream of numbers read from stdin',
formatter_class=ArgumentDefaultsHelpFormatter)
args = parser.parse_args(argv[1:])
stats = Stats()
for line in sys.stdin:
stats(float(line))
print(stats)
def get(name):
try:
return getattr(stats, name)
except AttributeError:
raise PlotnineError(
"Unknown distribution '{}'".format(name))
def get_continuous_distribution(name):
if name not in continuous:
msg = "Unknown continuous distribution '{}'"
raise ValueError(msg.format(name))
return getattr(stats, name)
def get_gleu_stats(self, scores):
"""calculate mean and confidence interval from all GLEU iterations"""
mean = np.mean(scores)
std = np.std(scores)
ci = scipy.stats.norm.interval(0.95, loc=mean, scale=std)
return ['%f' % mean,
'%f' % std,
'(%.3f,%.3f)' % (ci[0], ci[1])]
def diff_exp(matrix, group1, group2, index):
"""Computes differential expression between group 1 and group 2
for each column in the dataframe counts.
Returns a dataframe of Z-scores and p-values."""
g1 = matrix[group1, :]
g2 = matrix[group2, :]
g1mu = g1.mean(0)
g2mu = g2.mean(0)
mean_diff = np.asarray(g1mu - g2mu).flatten()
# E[X^2] - (E[X])^2
pooled_sd = np.sqrt(
((g1.power(2)).mean(0) - np.power(g1mu, 2)) / len(group1)
+ ((g2.power(2)).mean(0) - np.power(g2mu, 2)) / len(group2))
pooled_sd = np.asarray(pooled_sd).flatten()
z_scores = np.zeros_like(pooled_sd)
nz = pooled_sd > 0
z_scores[nz] = np.nan_to_num(mean_diff[nz] / pooled_sd[nz])
# t-test
p_vals = (1 - stats.norm.cdf(np.abs(z_scores))) * 2
df = pd.DataFrame(OrderedDict([('z', z_scores), ('p', p_vals)]),
index=index)
return df
def calculate_plate_summaries(self):
"""Get mean reads, percent mapping, etc summaries for each plate"""
well_map = self.cell_metadata.groupby(Plates.SAMPLE_MAPPING)
# these stats are from STAR mapping
star_cols = ['Number of input reads', 'Uniquely mapped reads number']
star_stats = self.mapping_stats[star_cols].groupby(
self.cell_metadata[Plates.SAMPLE_MAPPING]).sum()
total_reads = star_stats['Number of input reads']
unique_reads = star_stats['Uniquely mapped reads number']
percent_ercc = well_map.sum()['ercc'].divide(total_reads, axis=0)
percent_mapped_reads = unique_reads / total_reads - percent_ercc
plate_summaries = pd.DataFrame(OrderedDict([
(Plates.MEAN_READS_PER_CELL, total_reads / well_map.size()),
(Plates.MEDIAN_GENES_PER_CELL, well_map.median()['n_genes']),
('Percent not uniquely aligned', 100 * well_map.sum()['alignment_not_unique'].divide(total_reads, axis=0)),
(Plates.PERCENT_MAPPED_READS, 100 * percent_mapped_reads),
('Percent no feature', 100 * well_map.sum()['no_feature'].divide(total_reads, axis=0)),
('Percent Rn45s', 100 * self.genes['Rn45s'].groupby(
self.cell_metadata[Plates.SAMPLE_MAPPING]).sum() / total_reads),
(Plates.PERCENT_ERCC, 100 * percent_ercc),
('n_wells', well_map.size())
]))
return plate_summaries
def _bin_exp(self, n_bin, scale=1.0):
""" Calculate the bin locations to approximate exponential distribution.
It breaks the cumulative probability of exponential distribution
into n_bin equal bins, each covering 1 / n_bin probability. Then it
calculates the center of mass in each bins and returns the
centers of mass. So, it approximates the exponential distribution
with n_bin of Delta function weighted by 1 / n_bin, at the
locations of these centers of mass.
Parameters:
-----------
n_bin: int
The number of bins to approximate the exponential distribution
scale: float, default: 1.0
The scale parameter of the exponential distribution, defined in
the same way as scipy.stats. It does not influence the ratios
between the bins, but just controls the spacing between the bins.
So generally users should not change its default.
Returns:
--------
bins: numpy array of size [n_bin,]
The centers of mass for each segment of the
exponential distribution.
"""
boundaries = np.flip(scipy.stats.expon.isf(
np.linspace(0, 1, n_bin + 1),
scale=scale), axis=0)
bins = np.empty(n_bin)
for i in np.arange(n_bin):
bins[i] = utils.center_mass_exp(
(boundaries[i], boundaries[i + 1]), scale=scale)
return bins
def _set_SNR_grids(self):
""" Set the grids and weights for SNR used in numerical integration
of SNR parameters.
"""
if self.SNR_prior == 'unif':
SNR_grids = np.linspace(0, 1, self.SNR_bins)
SNR_weights = np.ones(self.SNR_bins) / (self.SNR_bins - 1)
SNR_weights[0] = SNR_weights[0] / 2.0
SNR_weights[-1] = SNR_weights[-1] / 2.0
elif self.SNR_prior == 'lognorm':
dist = scipy.stats.lognorm
alphas = np.arange(np.mod(self.SNR_bins, 2),
self.SNR_bins + 2, 2) / self.SNR_bins
# The goal here is to divide the area under the pdf curve
# to segments representing equal probabilities.
bounds = dist.interval(alphas, (self.logS_range,))
bounds = np.unique(bounds)
# bounds contain the boundaries which equally separate
# the probability mass of the distribution
SNR_grids = np.zeros(self.SNR_bins)
for i in np.arange(self.SNR_bins):
SNR_grids[i] = dist.expect(
lambda x: x, args=(self.logS_range,),
lb=bounds[i], ub=bounds[i + 1]) * self.SNR_bins
# Center of mass of each segment between consecutive
# bounds are set as the grids for SNR.
SNR_weights = np.ones(self.SNR_bins) / self.SNR_bins
else: # SNR_prior == 'exp'
SNR_grids = self._bin_exp(self.SNR_bins)
SNR_weights = np.ones(self.SNR_bins) / self.SNR_bins
SNR_weights = SNR_weights / np.sum(SNR_weights)
return SNR_grids, SNR_weights
def lnfunc(self,args):
# define the function which needs to be sampled (log posterior)
# Do not use self.args in this function. You can use self.eargs.
return scipy.stats.norm.logpdf(args['x'],loc=args['mu'],scale=args['sigma'])
def lnfunc(self,args):
# log posterior
temp1=scipy.stats.norm.logpdf(args['xt'],loc=args['mu'],scale=args['sigma'])
temp2=scipy.stats.norm.logpdf(args['x'],loc=args['xt'],scale=args['sigma_x'])
return temp1+temp2
def lnfunc(self,args):
if self.eargs['outliers'] == False:
temp1=(args['y']-(self.args['m']*self.args['x']+self.args['c']))/args['sigma_y']
return -0.5*(temp1*temp1)-np.log(np.sqrt(2*np.pi)*args['sigma_y'])
else:
temp11=scipy.stats.norm.pdf(args['y'],loc=(self.args['m']*self.args['x']+self.args['c']),scale=args['sigma_y'])
sigma_b=np.sqrt(np.square(args['sigma_y'])+np.square(args['sigma_b']))
temp22=scipy.stats.norm.pdf(args['y'],loc=self.args['mu_b'],scale=sigma_b)
return np.log((1-args['p_b'])*temp11+args['p_b']*temp22)
def lnfunc(self,args):
# log posterior, xt has been integrated out
sigma=np.sqrt(args['sigma']*args['sigma']+args['sigma_x']*args['sigma_x'])
temp=scipy.stats.norm.logpdf(args['x'],loc=args['mu'],scale=sigma)
return temp
def lnfunc(self,args):
# log posterior
temp1=[]
for i,y in enumerate(self.data['y']):
temp1.append(np.sum(self.lngauss(y-args['alpha'][i],self.eargs['sigma'])))
temp1=np.array(temp1)
temp2=scipy.stats.norm.logpdf(args['alpha'],loc=args['mu'],scale=args['omega'])
return temp1+temp2
def __init__(self, stats, channels, target_dim):
self.stats = stats
self.channels = channels
self.target_dim = (target_dim[0], target_dim[1])
self.illum_corr_func = np.zeros((self.target_dim[0], self.target_dim[1], len(self.channels)))
# Based on Sing et al. 2014 paper
def channel_function(self, mean_channel, disk_size):
#TODO: get np.type from other source or parameterize or compute :/
# We currently assume 16 bit images
operator = skimage.morphology.disk(disk_size)
filtered_channel = skimage.filters.median(mean_channel.astype(np.uint16), operator)
filtered_channel = skimage.transform.resize(filtered_channel, self.target_dim, preserve_range=True)
robust_minimum = scipy.stats.scoreatpercentile(filtered_channel, 2)
filtered_channel = np.maximum(filtered_channel, robust_minimum)
illum_corr_func = filtered_channel / robust_minimum
return illum_corr_func
def compute_all(self, median_filter_size):
disk_size = median_filter_size / 2 # From diameter to radius
for ch in range(len(self.channels)):
self.illum_corr_func[:, :, ch] = self.channel_function(self.stats["mean_image"][:, :, ch], disk_size)
# TODO: Is image a uint16 or float32/64? What is its data type?
# TODO: Update the test as appropriate.
# Not being used? Not needed?