def test_filter_on_column_with_inf():
# Test that the function exclude columns where feature value is 'inf'
data = pd.DataFrame({'id': np.arange(1, 5, dtype='int64'),
'feature_1': [1.5601, 0, 2.33, 11.32],
'feature_ok': np.arange(1, 5)})
data['feature_with_inf'] = 1/data['feature_1']
bad_df = data[np.isinf(data['feature_with_inf'])].copy()
good_df = data[~np.isinf(data['feature_with_inf'])].copy()
bad_df.reset_index(drop=True, inplace=True)
good_df.reset_index(drop=True, inplace=True)
output_df, output_excluded_df = filter_on_column(data,
'feature_with_inf',
'id',
exclude_zeros=False,
exclude_zero_sd=True)
print(output_df)
assert_frame_equal(output_df, good_df)
assert_frame_equal(output_excluded_df, bad_df)
python类isinf()的实例源码
def remove_outliers_by_classifier(X, y, dates, model, m=0.9):
#xgboost = XGBoost(max_depth=2, num_round=6000)
if np.isnan(X).any():
print("X contains NaN")
if np.isinf(X).any():
print("X contains inf")
if np.isnan(np.log(y)).any():
print("y contains nan")
if np.isinf(np.log(y)).any():
print("y contains inf")
print("X=", X.shape)
print("y=", y.shape)
model.fit(X, y)
y_pred = model.predict(X)
diff_values = np.abs(y_pred - y)
abs_diff_vals = np.abs(diff_values)
sorted_indexes = sorted(range(len(abs_diff_vals)), key = lambda x: abs_diff_vals[x])
sorted_indexes_lead = sorted_indexes[:int(len(abs_diff_vals)*m)]
return X[sorted_indexes_lead], y[sorted_indexes_lead], dates[sorted_indexes_lead]
def reldist_linpol(tx_soa, beacon_soa):
# Interpolate between two nearest beacon samples
beacon_rx0, beacon_rx1 = beacon_soa[:, 0], beacon_soa[:, 1]
tx_rx0, tx_rx1 = tx_soa[:, 0], tx_soa[:, 1]
high_idx = np.searchsorted(beacon_rx0, tx_rx0)
low_idx = high_idx - 1
length = len(beacon_soa[:, 0])
if high_idx[-1] >= length:
high_idx[-1] = length - 1
if low_idx[0] < 0:
high_idx[0] = 0
weight = ((tx_rx0 - beacon_rx0[low_idx]) /
(beacon_rx0[high_idx] - beacon_rx0[low_idx]))
weight[np.isinf(weight)] = 1 # remove nan
# Reldist in samples
reldist = (tx_rx1 - (beacon_rx1[low_idx] * (1-weight) +
beacon_rx1[high_idx] * weight)) # / 2.0
return reldist
def af_fit(self, params):
# TODO: fix me for continuos prediction
seasonal_errors = []
self.pred_vs_true = []
for s,t in self.fit_test_season_pairs:
weights = np.exp(self.fitness(params, self.predictor_arrays[s][self.tree.root.season_tips[s],:]))
pred_af = self.weighted_af(self.seqs[s],weights)
#seasonal_errors.append(np.mean(np.sum((pred_af-self.af[t])**2, axis=0), axis=0))
future_diameter = 0.5*np.sum(np.sum(self.af[t]*(1-self.af[t]), axis=0), axis=0)
seasonal_errors.append(np.sum(np.sum(pred_af*(1-self.af[t]), axis=0), axis=0)-future_diameter)
good_ind = self.af[s]*(1-self.af[s])>0.05
self.pred_vs_true.append(np.array(zip(self.af[s][good_ind], self.af[t][good_ind], pred_af[good_ind])))
mean_error = np.mean(seasonal_errors)
if any(np.isnan(seasonal_errors)+np.isinf(seasonal_errors)):
mean_error = 1e10
self.last_fit = mean_error
if self.verbose>2: print params, self.last_fit
return mean_error + regularization*np.sum(params**2)
def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
view_target_data (ndarray): N x 3K blob of regression targets
view_loss_weights (ndarray): N x 3K blob of loss weights
"""
view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
for ind in inds:
cls = clss[ind]
start = 3 * cls
end = start + 3
view_targets[ind, start:end] = viewpoint_data[ind, :]
view_loss_weights[ind, start:end] = [1., 1., 1.]
assert not np.isinf(view_targets).any(), 'viewpoint undefined'
return view_targets, view_loss_weights
test_analytics.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def test_sum_inf(self):
import pandas.core.nanops as nanops
s = Series(np.random.randn(10))
s2 = s.copy()
s[5:8] = np.inf
s2[5:8] = np.nan
self.assertTrue(np.isinf(s.sum()))
arr = np.random.randn(100, 100).astype('f4')
arr[:, 2] = np.inf
with cf.option_context("mode.use_inf_as_null", True):
assert_almost_equal(s.sum(), s2.sum())
res = nanops.nansum(arr, axis=1)
self.assertTrue(np.isinf(res).all())
test_scalarmath.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 50
收藏 0
点赞 0
评论 0
def test_zero_division(self):
with np.errstate(all="ignore"):
for t in [np.complex64, np.complex128]:
a = t(0.0)
b = t(1.0)
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.nan))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.nan))
assert_(np.isnan(b/a))
b = t(0.)
assert_(np.isnan(b/a))
def fit(self, Y):
"""
Generates the RBF coefficients to fit a set of given data values Y for centers self.centers
:param Y: A set of dependent data values corresponding to self.centers
:return: Void, sets the self.coefs values
"""
kernel_matrix = self.EvaluateCentersKernel()
kernel_matrix[np.isinf(kernel_matrix)] = 0 # TODO: Is there a better way to avoid the diagonal?
monomial_basis = poly.GetMonomialBasis(self.dimension, self.poly_degree)
poly_matrix = poly.BuildPolynomialMatrix(monomial_basis, self.centers.transpose()) # TODO: Probably remove transpose requirement
poly_shape = np.shape(poly_matrix)
# Get the number of columns, as we need to make an np.zeros((num_cols,num_cols))
num_cols = poly_shape[1]
num_rbf_coefs = len(self.centers)
zero_mat = np.zeros((num_cols,num_cols))
upper_matrix = np.hstack((kernel_matrix, poly_matrix))
lower_matrix = np.hstack((poly_matrix.transpose(),zero_mat))
rbf_matrix = np.vstack((upper_matrix,lower_matrix))
Y = np.concatenate((Y,np.zeros((num_cols)))) # Extend with zeros for the polynomial annihilation
self.coefs = sl.solve(rbf_matrix, Y, sym_pos=False)
def test_beam_statistics(RE, resize, kernel, uint_mode, thresh_mode, min_area,
thresh_factor, filter_kernel, image_num, cent_num,
image_delay, ad_data, image_data,
lcls_two_bounce_system):
_, _, _, y1, y2 = lcls_two_bounce_system
array_str = "image1.array_data"
size_str = "image1.array_size"
def test_plan():
stats = yield from beam_statistics(
[y1, y2], array_field=array_str, size_field=size_str,
cent_num=cent_num, image_num=image_num,
kernel=kernel, resize=resize, uint_mode=uint_mode,
thresh_factor=thresh_factor, filter_kernel=filter_kernel,
thresh_mode=thresh_mode, md="all", image_delay=image_delay,
ad_data=ad_data, image_data=image_data)
for _, det in stats.items():
for key, val in det.items():
if key == "md":
continue
assert(not np.isnan(val) or not np.isinf(val) or not None)
RE(run_wrapper(test_plan()))
def equal(a, b, exact):
if array_equal(a, b):
return True
if hasattr(a, 'dtype') and a.dtype in ['f4', 'f8']:
nnans = isnan(a).sum()
if nnans > 0:
# For results containing NaNs, just check that the number
# of NaNs is the same in both arrays. This check could be
# made more exhaustive, but checking element by element in
# python space is very expensive in general.
return nnans == isnan(b).sum()
ninfs = isinf(a).sum()
if ninfs > 0:
# Ditto for Inf's
return ninfs == isinf(b).sum()
if exact:
return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0)
else:
if hasattr(a, 'dtype') and a.dtype == 'f4':
atol = 1e-5 # Relax precission for special opcodes, like fmod
else:
atol = 1e-8
return (shape(a) == shape(b) and
allclose(ravel(a), ravel(b), atol=atol))
def calc_specialist_weights(numsamps):
"""
Calculates vector of specialist weights.
Args:
numsamps: A nonnegative vector of ints, specifying the number of samples on which each specialist predicts.
Returns:
A vector of floats specifying each specialist's weight (1/(fraction of data supported)).
If numsamps[i] == 0 for some specialist i, the corresponding weight will be 0.
Note that the return value is invariant to the scaling of numsamps by a positive constant.
Similarly, calculating numsamps using a uniform random subsample of a dataset
will result in approximately the same return value as using the full dataset.
"""
weights = 1.0/numsamps
weights[np.isinf(weights)] = 0.0
return np.max(numsamps)*weights
def _get_viewpoint_estimation_labels(viewpoint_data, clss, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb.
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded.
Returns:
view_target_data (ndarray): N x 3K blob of regression targets
view_loss_weights (ndarray): N x 3K blob of loss weights
"""
view_targets = np.zeros((clss.size, 3 * num_classes), dtype=np.float32)
view_loss_weights = np.zeros(view_targets.shape, dtype=np.float32)
inds = np.where( (clss > 0) & np.isfinite(viewpoint_data[:,0]) & np.isfinite(viewpoint_data[:,1]) & np.isfinite(viewpoint_data[:,2]) )[0]
for ind in inds:
cls = clss[ind]
start = 3 * cls
end = start + 3
view_targets[ind, start:end] = viewpoint_data[ind, :]
view_loss_weights[ind, start:end] = [1., 1., 1.]
assert not np.isinf(view_targets).any(), 'viewpoint undefined'
return view_targets, view_loss_weights
def test_zero_division(self):
with np.errstate(all="ignore"):
for t in [np.complex64, np.complex128]:
a = t(0.0)
b = t(1.0)
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.inf, np.nan))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.inf))
assert_(np.isinf(b/a))
b = t(complex(np.nan, np.nan))
assert_(np.isnan(b/a))
b = t(0.)
assert_(np.isnan(b/a))
def get_cubic_root(self):
# We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
# where x = sqrt(mu).
# We substitute x, which is sqrt(mu), with x = y + 1.
# It gives y^3 + py = q
# where p = (D^2 h_min^2)/(2*C) and q = -p.
# We use the Vieta's substution to compute the root.
# There is only one real solution y (which is in [0, 1] ).
# http://mathworld.wolfram.com/VietasSubstitution.html
# eps in the numerator is to prevent momentum = 1 in case of zero gradient
if np.isnan(self._dist_to_opt) or np.isnan(self._h_min) or np.isnan(self._grad_var) \
or np.isinf(self._dist_to_opt) or np.isinf(self._h_min) or np.isinf(self._grad_var):
logging.warning("Input to cubic solver has invalid nan/inf value!")
raise Exception("Input to cubic solver has invalid nan/inf value!")
p = (self._dist_to_opt + eps)**2 * (self._h_min + eps)**2 / 2 / (self._grad_var + eps)
w3 = (-math.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
w = math.copysign(1.0, w3) * math.pow(math.fabs(w3), 1.0/3.0)
y = w - p / 3.0 / (w + eps)
x = y + 1
if self._verbose:
logging.debug("p %f, denominator %f", p, self._grad_var + eps)
logging.debug("w3 %f ", w3)
logging.debug("y %f, denominator %f", y, w + eps)
if np.isnan(x) or np.isinf(x):
logging.warning("Output from cubic is invalid nan/inf value!")
raise Exception("Output from cubic is invalid nan/inf value!")
return x
def check_entry(key, value):
if key != 'period_label':
return np.isnan(value) or np.isinf(value)
else:
return False
############################
# Risk Metric Calculations #
############################
def check_data(X, X_names, Y):
#type checks
assert type(X) is np.ndarray, "type(X) should be numpy.ndarray"
assert type(Y) is np.ndarray, "type(Y) should be numpy.ndarray"
assert type(X_names) is list, "X_names should be a list"
#sizes and uniqueness
N, P = X.shape
assert N > 0, 'X matrix must have at least 1 row'
assert P > 0, 'X matrix must have at least 1 column'
assert len(Y) == N, 'len(Y) should be same as # of rows in X'
assert len(list(set(X_names))) == len(X_names), 'X_names is not unique'
assert len(X_names) == P, 'len(X_names) should be same as # of cols in X'
#X_matrix values
if '(Intercept)' in X_names:
assert all(X[:, X_names.index('(Intercept)')] == 1.0), "'(Intercept)' column should only be composed of 1s"
else:
warnings.warn("there is no column named '(Intercept)' in X_names")
assert np.all(~np.isnan(X)), 'X has nan entries'
assert np.all(~np.isinf(X)), 'X has inf entries'
#Y vector values
assert all((Y == 1)|(Y == -1)), 'Y[i] should = [-1,1] for all i'
if all(Y == 1):
warnings.warn("all Y_i == 1 for all i")
if all(Y == -1):
warnings.warn("all Y_i == -1 for all i")
#TODO (optional) collect warnings and return those?
def setRange(self, mn, mx):
"""Set the range of values displayed by the axis.
Usually this is handled automatically by linking the axis to a ViewBox with :func:`linkToView <pyqtgraph.AxisItem.linkToView>`"""
if any(np.isinf((mn, mx))) or any(np.isnan((mn, mx))):
raise Exception("Not setting range to [%s, %s]" % (str(mn), str(mx)))
self.range = [mn, mx]
if self.autoSIPrefix:
self.updateAutoSIPrefix()
self.picture = None
self.update()
def siScale(x, minVal=1e-25, allowUnicode=True):
"""
Return the recommended scale factor and SI prefix string for x.
Example::
siScale(0.0001) # returns (1e6, '?')
# This indicates that the number 0.0001 is best represented as 0.0001 * 1e6 = 100 ?Units
"""
if isinstance(x, decimal.Decimal):
x = float(x)
try:
if np.isnan(x) or np.isinf(x):
return(1, '')
except:
print(x, type(x))
raise
if abs(x) < minVal:
m = 0
x = 0
else:
m = int(np.clip(np.floor(np.log(abs(x))/np.log(1000)), -9.0, 9.0))
if m == 0:
pref = ''
elif m < -8 or m > 8:
pref = 'e%d' % (m*3)
else:
if allowUnicode:
pref = SI_PREFIXES[m+8]
else:
pref = SI_PREFIXES_ASCII[m+8]
p = .001**m
return (p, pref)
def map(self, data):
data = data[self.fieldName]
scaled = np.clip((data-self['Min']) / (self['Max']-self['Min']), 0, 1)
cmap = self.value()
colors = cmap.map(scaled, mode='float')
mask = np.isnan(data) | np.isinf(data)
nanColor = self['NaN']
nanColor = (nanColor.red()/255., nanColor.green()/255., nanColor.blue()/255., nanColor.alpha()/255.)
colors[mask] = nanColor
return colors
def setRange(self, mn, mx):
"""Set the range of values displayed by the axis.
Usually this is handled automatically by linking the axis to a ViewBox with :func:`linkToView <pyqtgraph.AxisItem.linkToView>`"""
if any(np.isinf((mn, mx))) or any(np.isnan((mn, mx))):
raise Exception("Not setting range to [%s, %s]" % (str(mn), str(mx)))
self.range = [mn, mx]
if self.autoSIPrefix:
self.updateAutoSIPrefix()
self.picture = None
self.update()