def makedists(pdata,binl):
##### This is called from within makeraindist.
##### Caclulate distributions
pds=pdata.shape; nlat=pds[1]; nlon=pds[0]; nd=pds[2]
bins=np.append(0,binl)
n=np.empty((nlon,nlat,len(binl)))
binno=np.empty(pdata.shape)
for ilon in range(nlon):
for ilat in range(nlat):
# this is the histogram - we'll get frequency from this
thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins)
n[ilon,ilat,:]=thisn
# these are the bin locations. we'll use these for the amount dist
binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins)
#### Calculate the number of days with non-missing data, for normalization
ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
thisppdfmap=n/ndmat
#### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
testpamtmap=np.empty(thisppdfmap.shape)
for ibin in range(len(bins)-1):
testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
thispamtmap=testpamtmap/ndmat
return thisppdfmap,thispamtmap
python类nansum()的实例源码
def makedists(pdata,binl):
##### This is called from within makeraindist.
##### Caclulate distributions
pds=pdata.shape; nlat=pds[1]; nlon=pds[0]; nd=pds[2]
bins=np.append(0,binl)
n=np.empty((nlon,nlat,len(binl)))
binno=np.empty(pdata.shape)
for ilon in range(nlon):
for ilat in range(nlat):
# this is the histogram - we'll get frequency from this
thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins)
n[ilon,ilat,:]=thisn
# these are the bin locations. we'll use these for the amount dist
binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins)
#### Calculate the number of days with non-missing data, for normalization
ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
thisppdfmap=n/ndmat
#### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
testpamtmap=np.empty(thisppdfmap.shape)
for ibin in range(len(bins)-1):
testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
thispamtmap=testpamtmap/ndmat
return thisppdfmap,thispamtmap
def plot_power_rose(wind_directions,power,num_wd_bins):
"""Plot a power rose. Kind of a hacked wind rose.
Arguments:
wind_directions -- a np array of wind directions filtered for icing
power -- a np array of percent power production corresponding to wind_directions
num_wd_bins -- the number of wind direction bins to include on the rose.
"""
dir_bins = np.array(np.linspace(0.0,360.0 - 360.0 / num_wd_bins,num_wd_bins))
#Find the total amount of power produced in each sector.
dir_power = np.array([np.nansum(filter_obstacles(power,wind_directions,(wd + 180.0) % 360.0, 360 - 360/float(num_wd_bins))) for wd in dir_bins])
dir_power = np.round(dir_power * 100.0 / np.nansum(dir_power), decimals=0) #Normalize it and round to nearest int.
proportional_wd = np.array([])
for i in range(len(dir_power)):
for n in range(int(dir_power[i])): #Loop as many times as the percent of power produced in this sector.
proportional_wd = np.append(proportional_wd,dir_bins[i]) #i.e., if 50% of power comes from the south, append 50 instances of 180.0 degrees.
ones = np.ones(len(proportional_wd))
ax = new_axes()
ax.bar(proportional_wd, ones,normed=False, opening=0.8, edgecolor='white', bins = [0.0,100.], cmap=cm.RdGy)
set_legend(ax)
def ests_ll_quad(self, params):
"""
Calculate the loglikelihood given model parameters `params`.
This method uses Gaussian quadrature, and thus returns an *approximate*
integral.
"""
mu0, gamma0, err0 = np.split(params, 3)
x = np.tile(self.z, (self.cfg.QCOUNT, 1, 1)) # (QCOUNTXnhospXnmeas)
loc = mu0 + np.outer(QC1, gamma0)
loc = np.tile(loc, (self.n, 1, 1))
loc = np.transpose(loc, (1, 0, 2))
scale = np.tile(err0, (self.cfg.QCOUNT, self.n, 1))
zs = lpdf_3d(x=x, loc=loc, scale=scale)
w2 = np.tile(self.w, (self.cfg.QCOUNT, 1, 1))
wted = np.nansum(w2 * zs, axis=2).T # (nhosp X QCOUNT)
qh = np.tile(QC1, (self.n, 1)) # (nhosp X QCOUNT)
combined = wted + norm.logpdf(qh) # (nhosp X QCOUNT)
return logsumexp(np.nan_to_num(combined), b=QC2, axis=1) # (nhosp)
def chi2(b, dataset, model1='phoebe1model', model2='phoebe2model'):
ds = b.get_dataset(dataset) - b.get_dataset(dataset, method='*dep')
if ds.method=='lc':
depvar = 'fluxes'
elif ds.method=='rv':
depvar = 'rvs'
else:
raise NotImplementedError("chi2 doesn't support dataset method: '{}'".format(ds.method))
chi2 = 0.0
for comp in ds.components if len(ds.components) else [None]:
if comp=='_default':
continue
# phoebe gives nans for RVs when a star is completely eclipsed, whereas
# phoebe1 will give a value. So let's use nansum to just ignore those
# regions of the RV curve
print "***", depvar, dataset, model1, model2, comp
chi2 += np.nansum((b.get_value(qualifier=depvar, dataset=dataset, model=model1, component=comp, context='model')\
-b.get_value(qualifier=depvar, dataset=dataset, model=model2, component=comp, context='model'))**2)
return chi2
def weighted_average(weights, pep_abd, group_ix):
'''
Calculate weighted geometric means for sample groups
Inputs:
weights: weights of peptides after filtering by loading threshold
pep_abd: peptide abundances after filtering by loading threshold
group_ix: array indexes of sample groups
'''
global nGroups
abd_w = pep_abd * weights[..., None]
one_w = abd_w / abd_w * weights[..., None]
a_sums = np.nansum(abd_w, axis=0)
w_sums = np.nansum(one_w, axis=0)
expr = np.empty(nGroups)
for i in range(expr.shape[0]):
expr[i] = a_sums[group_ix[i]].sum() / w_sums[group_ix[i]].sum()
return expr
def pwdist_canberra(self, seq1idx, seq2idx):
"""Compute the Canberra distance between two vectors.
References:
1. http://scipy.org/
Notes:
When `u[i]` and `v[i]` are 0 for given i, then
the fraction 0/0 = 0 is used in the calculation.
"""
u = self[seq1idx]
v = self[seq2idx]
olderr = np.seterr(invalid='ignore')
try:
d = np.nansum(abs(u - v) / (abs(u) + abs(v)))
finally:
np.seterr(**olderr)
return d
def normalize(self, to=1.0):
"""
This function ...
:param to:
:return:
"""
# Calculate the sum of all the pixels
sum = np.nansum(self)
# Calculate the conversion factor
factor = to / sum
# Multiply the frame with the conversion factor
self.__imul__(factor)
# -----------------------------------------------------------------
def normalize(self, to=1.0):
"""
This function ...
:param to:
:return:
"""
# Calculate the sum of all the pixels
sum = np.nansum(self)
# Calculate the conversion factor
factor = to / sum
# Multiply the frame with the conversion factor
self.__imul__(factor)
# -----------------------------------------------------------------
def calculate_optimizer_time(trials):
optimizer_time = []
time_idx = 0
optimizer_time.append(trials.cv_starttime[0] - trials.starttime[time_idx])
for i in range(len(trials.cv_starttime[1:])):
if trials.cv_starttime[i + 1] > trials.endtime[time_idx]:
optimizer_time.append(trials.endtime[time_idx] -
trials.cv_endtime[i])
time_idx += 1
optimizer_time.append(trials.cv_starttime[i + 1] -
trials.starttime[time_idx])
else:
optimizer_time.append(trials.cv_starttime[i + 1] -
trials.cv_endtime[i])
optimizer_time.append(trials.endtime[time_idx] - trials.cv_endtime[-1])
trials.optimizer_time = optimizer_time
# We need to import numpy again
import numpy as np
return np.nansum(optimizer_time)
def lnlike(self, pars):
# Pull theta out of pars
theta = pars[:self.Nbins]
# Generate the inner summation
gamma = np.ones_like(self.bin_idx) * np.nan
good = (self.bin_idx < self.Nbins) & (self.bin_idx >= 0) # nans in q get put in nonexistent bins
gamma[good] = self.Nobs * self.censoring_fcn(self.mcmc_samples[good]) * theta[self.bin_idx[good]]
summation = np.nanmean(gamma, axis=1)
# Calculate the integral
I = self._integral_fcn(theta)
# Generate the log-likelihood
ll = -I + np.nansum(np.log(summation))
return ll
test_analytics.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def test_sum_inf(self):
import pandas.core.nanops as nanops
s = Series(np.random.randn(10))
s2 = s.copy()
s[5:8] = np.inf
s2[5:8] = np.nan
self.assertTrue(np.isinf(s.sum()))
arr = np.random.randn(100, 100).astype('f4')
arr[:, 2] = np.inf
with cf.option_context("mode.use_inf_as_null", True):
assert_almost_equal(s.sum(), s2.sum())
res = nanops.nansum(arr, axis=1)
self.assertTrue(np.isinf(res).all())
def r(self):
"""
Pearson correlation of the fitted Variogram
:return:
"""
# get the experimental and theoretical variogram and cacluate means
experimental, model = self.__model_deviations()
mx = np.nanmean(experimental)
my = np.nanmean(model)
# claculate the single pearson correlation terms
term1 = np.nansum(np.fromiter(map(lambda x, y: (x-mx) * (y-my), experimental, model), np.float))
t2x = np.nansum(np.fromiter(map(lambda x: (x-mx)**2, experimental), np.float))
t2y = np.nansum(np.fromiter(map(lambda y: (y-my)**2, model), np.float))
return term1 / (np.sqrt(t2x * t2y))
def entropy(v, axis=0):
"""
Optimized implementation of entropy. This version is faster than that in
scipy.stats.distributions, particularly over long vectors.
"""
v = numpy.array(v, dtype='float')
s = numpy.sum(v, axis=axis)
with numpy.errstate(divide='ignore', invalid='ignore'):
rhs = numpy.nansum(v * numpy.log(v), axis=axis) / s
r = numpy.log(s) - rhs
# Where dealing with binarized events, it is possible that an event always
# occurs and thus has 0 information. In this case, the negative class
# will have frequency 0, resulting in log(0) being computed as nan.
# We replace these nans with 0
nan_index = numpy.isnan(rhs)
if nan_index.any():
r[nan_index] = 0
return r
def __call__(self, y_true_proba, y_proba):
"""
See Murphy (1973) A vector partition of the probability score
"""
np.seterr(divide="ignore")
pos_obs_freq = np.histogram(
y_proba[y_true_proba == 1], bins=self.bins)[0]
fore_freq = np.histogram(y_proba, bins=self.bins)[0]
climo = y_true_proba.mean()
unc = climo * (1 - climo)
pos_obs_rel_freq = np.zeros(pos_obs_freq.size)
for p in range(pos_obs_rel_freq.size):
if fore_freq[p] > 0:
pos_obs_rel_freq[p] = pos_obs_freq[p] / fore_freq[p]
else:
pos_obs_rel_freq[p] = np.nan
score = np.nansum(fore_freq * (pos_obs_rel_freq - climo) ** 2)
score /= float(y_proba.size)
return score / unc
def cluster_f_measure(ytrue, pred):
# higher is better
assert len(ytrue) == len(pred), 'inputs length must be equal.'
label2ix = {label: i for i, label in enumerate(np.unique(ytrue))}
_ytrue = np.array([label2ix[v] for v in ytrue])
nSize = len(_ytrue)
nClassTrue = len(np.unique(ytrue))
nClassPred = len(np.unique(pred))
f = np.zeros((nClassTrue, nClassPred)).astype(dtype=np.float64)
for i in xrange(nClassTrue):
freq_i = len(_ytrue[_ytrue == i])
for j in xrange(nClassPred):
freq_j = len(pred[pred == j])
freq_i_j = float(len(filter(lambda x: x == j, pred[_ytrue == i])))
precision = freq_i_j / freq_j if freq_j != 0 else 0
recall = freq_i_j / freq_i if freq_i != 0 else 0
if precision == 0 or recall == 0:
f[i, j] = 0.
else:
f[i, j] = 2. * (precision * recall) / (precision + recall)
return np.nansum([f[i][j] * len(_ytrue[_ytrue == i]) for i in xrange(nClassTrue) for j in xrange(nClassPred)]) / nSize
def ponderateByConcentration():
print 'Loading feature concentration..........'
sdFile = open('varStandarDevs.txt','rb')
standevs=pickle.load(sdFile)
sdFile.close()
totDevs={}
for feature in standevs:
totDevs[feature]=sum([abs(standevs[feature][si]) for si in range(len(standevs[feature]))])/len(standevs[feature])
localF=['turningAngle','turningAngleDifference','Coord','LP']
globalF=['accAngle','coG','relStrokeLength','liS','quadraticError']
totalF=['turningAngle','turningAngleDifference','Coord','LP','Style','accAngle','coG','relStrokeLength','liS','quadraticError']
print 'Ponderating features..........'
weights={}
norm=np.nansum([1/float(math.sqrt(totDevs[feature])) for feature in totalF])
for feature in totalF:
weights[feature]=(1/float(math.sqrt(totDevs[feature])))/float(norm)
print 'Features weighted as'
print weights
return weights
def nandot(a, b): # TODO: speed up, avoid copying data
"A numpy.dot() replacement which treats (0*-Inf)==0 and works around BLAS NaN bugs in matrices."
# important note: a contains zeros and b contains inf/-inf/nan, not the other way around
# workaround for zero*-inf=nan in dot product (must be 0 according to 0^0=1 with probabilities)
# 1) calculate dot product
# 2) select nan entries
# 3) re-calculate matrix entries where 0*inf = 0 using np.nansum()
tmp = np.dot(a, b)
indices = np.where(np.isnan(tmp))
ri, ci = indices
with np.errstate(invalid='ignore'):
values = np.nansum(a[ri, :] * b[:, ci].T, axis=1)
values[np.isnan(values)] = 0.0
tmp[indices] = values
return tmp
def argnanmedoid(x, axis=1):
"""
Return the indices of the medoid
:param x: input array
:param axis: axis to medoid along
:return: indices of the medoid
"""
if axis == 0:
x = x.T
invalid = anynan(x, axis=0)
band, time = x.shape
diff = x.reshape(band, time, 1) - x.reshape(band, 1, time)
dist = np.sqrt(np.sum(diff * diff, axis=0)) # dist = np.linalg.norm(diff, axis=0) is slower somehow...
dist_sum = nansum(dist, axis=0)
dist_sum[invalid] = np.inf
i = np.argmin(dist_sum)
return i
def medoid_indices(arr, invalid=None):
"""
The indices of the medoid.
:arg arr: input array
:arg invalid: mask for invalid data containing NaNs
"""
# vectorized version of `argnanmedoid`
bands, times, ys, xs = arr.shape
diff = (arr.reshape(bands, times, 1, ys, xs) -
arr.reshape(bands, 1, times, ys, xs))
dist = np.linalg.norm(diff, axis=0)
dist_sum = nansum(dist, axis=0)
if invalid is None:
# compute it in case it's not already available
invalid = anynan(arr, axis=0)
dist_sum[invalid] = np.inf
return np.argmin(dist_sum, axis=0)
def frame_to_series(self, field, frame, columns=None):
"""
Convert a frame with a DatetimeIndex and sid columns into a series with
a sid index, using the aggregator defined by the given field.
"""
if isinstance(frame, pd.DataFrame):
columns = frame.columns
frame = frame.values
if not len(frame):
return pd.Series(
data=(0 if field == 'volume' else np.nan),
index=columns,
).values
if field in ['price', 'close']:
# shortcircuit for full last row
vals = frame[-1]
if np.all(~np.isnan(vals)):
return vals
return ffill(frame)[-1]
elif field == 'open':
return bfill(frame)[0]
elif field == 'volume':
return np.nansum(frame, axis=0)
elif field == 'high':
return np.nanmax(frame, axis=0)
elif field == 'low':
return np.nanmin(frame, axis=0)
else:
raise ValueError("Unknown field {}".format(field))
def getJointNumFramesVisible(self, jointID):
"""
Get number of frames in which joint is visible
:param jointID: joint ID
:return: number of frames
"""
return numpy.nansum(self.gt[:, jointID, :]) / self.gt.shape[2] # 3D
def est_pmf_from_mpps(self, other, samples, eps=1e-10):
"""Estimate probability mass function from MPPovmList samples
:param MPPovmList other: An :class:`MPPovmList` instance
:param samples: Iterable of samples (e.g. from
:func:`MPPovmList.samples()`)
:returns: `(p_est, n_samples_used)`, both are shape
`self.nsoutdims` ndarrays. `p_est` provides estimated
probabilities and `n_samples_used` provides the effective
number of samples used for each probability.
"""
assert len(other.mpps) == len(samples)
pmf_ests = np.zeros((len(other.mpps),) + self.nsoutdims, float)
n_samples = np.zeros(len(other.mpps), int)
for pos, other_mpp, other_samples in zip(it.count(), other.mpps, samples):
pmf_ests[pos, ...], n_samples[pos] = self.est_pmf_from(
other_mpp, other_samples, eps)
n_out = np.prod(self.nsoutdims)
pmf_ests = pmf_ests.reshape((len(other.mpps), n_out))
given = ~np.isnan(pmf_ests)
n_samples_used = (given * n_samples[:, None]).sum(0)
# Weighted average over available estimates according to the
# number of samples underlying each estimate. Probabilities
# without any estimates produce 0.0 / 0 = nan in `pmf_est`.
pmf_est = np.nansum(pmf_ests * n_samples[:, None], 0) / n_samples_used
return (pmf_est.reshape(self.nsoutdims),
n_samples_used.reshape(self.nsoutdims))
def test_nansum_with_boolean(self):
# gh-2978
a = np.zeros(2, dtype=np.bool)
try:
np.nansum(a)
except:
raise AssertionError()
def test_nansum(self):
tgt = np.sum(self.mat)
for mat in self.integer_arrays():
assert_equal(np.nansum(mat), tgt)
def test_allnans(self):
# Check for FutureWarning
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
res = np.nansum([np.nan]*3, axis=None)
assert_(res == 0, 'result is not 0')
assert_(len(w) == 0, 'warning raised')
# Check scalar
res = np.nansum(np.nan)
assert_(res == 0, 'result is not 0')
assert_(len(w) == 0, 'warning raised')
# Check there is no warning for not all-nan
np.nansum([0]*3, axis=None)
assert_(len(w) == 0, 'unwanted warning raised')
def test_empty(self):
for f, tgt_value in zip([np.nansum, np.nanprod], [0, 1]):
mat = np.zeros((0, 3))
tgt = [tgt_value]*3
res = f(mat, axis=0)
assert_equal(res, tgt)
tgt = []
res = f(mat, axis=1)
assert_equal(res, tgt)
tgt = tgt_value
res = f(mat, axis=None)
assert_equal(res, tgt)
def compact_logit(x, eps=.00001):
import warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="divide by zero encountered in true_divide")
warnings.filterwarnings("ignore", message="divide by zero encountered in log")
warnings.filterwarnings("ignore", message="invalid value encountered in multiply")
return np.nansum(((x<=eps)*x, (x>=(1-eps))*x, ((x>eps)&(x<(1-eps)))*((1-2*eps)*(np.log(x/(1-x)))/(2*np.log((1-eps)/eps))+.5)),axis=0)
def _get_weights(self, data, kpi, variant):
if kpi not in self.reference_kpis:
return 1.0
reference_kpi = self.reference_kpis[kpi]
x = self.get_kpi_by_name_and_variant(data, reference_kpi, variant)
zeros_and_nans = sum(x == 0) + np.isnan(x).sum()
non_zeros = len(x) - zeros_and_nans
return non_zeros/np.nansum(x) * x
def KL(a, b):
"""Calculate the Kullback Leibler divergence between a and b """
D_KL = np.nansum(np.multiply(a, np.log(np.divide(a, b+np.spacing(1)))), axis=1)
return D_KL