def _check_annotations(value):
"""
Recursively check that value is either of a "simple" type (number, string,
date/time) or is a (possibly nested) dict, list or numpy array containing
only simple types.
"""
if isinstance(value, np.ndarray):
if not issubclass(value.dtype.type, ALLOWED_ANNOTATION_TYPES):
raise ValueError("Invalid annotation. NumPy arrays with dtype %s"
"are not allowed" % value.dtype.type)
elif isinstance(value, dict):
for element in value.values():
_check_annotations(element)
elif isinstance(value, (list, tuple)):
for element in value:
_check_annotations(element)
elif not isinstance(value, ALLOWED_ANNOTATION_TYPES):
raise ValueError("Invalid annotation. Annotations of type %s are not"
"allowed" % type(value))
python类number()的实例源码
def _check_annotations(value):
"""
Recursively check that value is either of a "simple" type (number, string,
date/time) or is a (possibly nested) dict, list or numpy array containing
only simple types.
"""
if isinstance(value, np.ndarray):
if not issubclass(value.dtype.type, ALLOWED_ANNOTATION_TYPES):
raise ValueError("Invalid annotation. NumPy arrays with dtype %s"
"are not allowed" % value.dtype.type)
elif isinstance(value, dict):
for element in value.values():
_check_annotations(element)
elif isinstance(value, (list, tuple)):
for element in value:
_check_annotations(element)
elif not isinstance(value, ALLOWED_ANNOTATION_TYPES):
raise ValueError("Invalid annotation. Annotations of type %s are not"
"allowed" % type(value))
def linear_trajectory_to(self, target_tf, traj_len):
"""Creates a trajectory of poses linearly interpolated from this tf to a target tf.
Parameters
----------
target_tf : :obj:`RigidTransform`
The RigidTransform to interpolate to.
traj_len : int
The number of RigidTransforms in the returned trajectory.
Returns
-------
:obj:`list` of :obj:`RigidTransform`
A list of interpolated transforms from this transform to the target.
"""
if traj_len < 0:
raise ValueError('Traj len must at least 0')
delta_t = 1.0 / (traj_len + 1)
t = 0.0
traj = []
while t < 1.0:
traj.append(self.interpolate_with(target_tf, t))
t += delta_t
traj.append(target_tf)
return traj
def drop_inconsistent_keys(self, columns, obj):
"""Drop inconsistent keys
Drop inconsistent keys from a ValueCounts or Histogram object.
:param list columns: columns key to retrieve desired datatypes
:param object obj: ValueCounts or Histogram object to drop inconsistent keys from
"""
# has array been converted first? if so, set correct comparison
# datatype
comp_dtype = []
for col in columns:
dt = np.dtype(self.var_dtype[col]).type()
is_converted = isinstance(
dt, np.number) or isinstance(
dt, np.datetime64)
if is_converted:
comp_dtype.append(np.int64)
else:
comp_dtype.append(self.var_dtype[col])
# keep only keys of types in comp_dtype
obj.remove_keys_of_inconsistent_type(prefered_key_type=comp_dtype)
return obj
def categorize_columns(self, df):
"""Categorize columns of dataframe by data type
:param df: input (pandas) data frame
"""
# check presence and data type of requested columns
# sort columns into numerical, timestamp and category based
for c in self.columns:
for col in c:
if col not in df.columns:
raise KeyError('column "{0:s}" not in dataframe "{1:s}"'.format(col, self.read_key))
dt = self.get_data_type(df, col)
if col not in self.var_dtype:
self.var_dtype[col] = dt.type
if (self.var_dtype[col] is np.string_) or (self.var_dtype[col] is np.object_):
self.var_dtype[col] = str
if not any(dt in types for types in (STRING_SUBSTR, NUMERIC_SUBSTR, TIME_SUBSTR)):
raise TypeError('cannot process column "{0:s}" of data type "{1:s}"'.format(col, str(dt)))
is_number = isinstance(dt.type(), np.number)
is_timestamp = isinstance(dt.type(), np.datetime64)
colset = self.num_cols if is_number else self.dt_cols if is_timestamp else self.str_cols
if col not in colset:
colset.append(col)
self.log().debug('Data type of column "%s" is "%s"', col, self.var_dtype[col])
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return an array rounded a to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
if result.ndim > 0:
result._mask = self._mask
result._update_from(self)
elif self._mask:
# Return masked when the scalar is masked
result = masked
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def setup_class(cls):
# Load a dataframe
dataframe = pd.read_csv('tests/data/decathlon.csv', index_col=0)
# Determine the categorical columns
cls.df_categorical = dataframe.select_dtypes(exclude=[np.number])
# Determine the numerical columns
cls.df_numeric = dataframe.drop(cls.df_categorical.columns, axis='columns')
# Determine the size of the numerical part of the dataframe
(cls.n, cls.p) = cls.df_numeric.shape
# Determine the covariance matrix
X = cls.df_numeric.copy()
cls.center_reduced = ((X - X.mean()) / X.std()).values
cls.cov = cls.center_reduced.T @ cls.center_reduced
# Calculate a full PCA
cls.n_components = len(cls.df_numeric.columns)
cls.pca = PCA(dataframe, n_components=cls.n_components, scaled=True)
def _filter(self, dataframe, supplementary_row_names, supplementary_column_names):
# Extract the categorical columns
self.categorical_columns = dataframe.select_dtypes(exclude=[np.number])
# Extract the supplementary rows
self.supplementary_rows = dataframe.loc[supplementary_row_names].copy()
self.supplementary_rows.drop(supplementary_column_names, axis=1, inplace=True)
# Extract the supplementary columns
self.supplementary_columns = dataframe[supplementary_column_names].copy()
self.supplementary_columns.drop(supplementary_row_names, axis=0, inplace=True)
# Remove the the supplementary columns and rows from the dataframe
dataframe.drop(supplementary_row_names, axis=0, inplace=True)
dataframe.drop(supplementary_column_names, axis=1, inplace=True)
def _filter(self, dataframe, supplementary_row_names, supplementary_column_names):
# Extract the categorical columns
self.categorical_columns = dataframe.select_dtypes(exclude=[np.number])
# Extract the supplementary rows
self.supplementary_rows = dataframe.loc[supplementary_row_names].copy()
self.supplementary_rows.drop(self.categorical_columns.columns, axis='columns', inplace=True)
# Extract the supplementary columns
self.supplementary_columns = dataframe[supplementary_column_names].copy()
self.supplementary_columns.drop(supplementary_row_names, axis='rows', inplace=True)
# Remove the categorical column and the supplementary columns and rows from the dataframe
dataframe.drop(supplementary_row_names, axis='rows', inplace=True)
dataframe.drop(supplementary_column_names, axis='columns', inplace=True)
dataframe.drop(self.categorical_columns.columns, axis='columns', inplace=True)
def __init__(self, bin_type, *repr_args):
"""
Constructor for a bin object.
:param id: identifier (e.g. bin number) of the bin
:param bin_type: "numerical" or "categorical"
:param repr_args: arguments to represent this bin.
args for numerical bin includes lower, upper, lower_closed, upper_closed
args for categorical bin includes a list of categories for this bin.
"""
if bin_type == "numerical" and len(repr_args) != 4:
raise ValueError("args for numerical bin are lower, upper, lower_closed, upper_closed.")
if bin_type == "categorical" and len(repr_args) != 1 and type(repr_args[0]) is not list:
raise ValueError("args for categorical bin is a list of categorical values for this bin.")
self.bin_type = bin_type
if bin_type == "numerical":
self.representation = NumericalRepresentation(*repr_args)
elif bin_type == "categorical":
self.representation = CategoricalRepresentation(*repr_args)
def _get_power(mean1, std1, n1, mean2, std2, n2, z_1_minus_alpha):
"""
Compute statistical power.
This is a helper function for compute_statistical_power(x, y, alpha=0.05)
Args:
mean1 (float): mean value of the treatment distribution
std1 (float): standard deviation of the treatment distribution
n1 (integer): number of samples of the treatment distribution
mean2 (float): mean value of the control distribution
std2 (float): standard deviation of the control distribution
n2 (integer): number of samples of the control distribution
z_1_minus_alpha (float): critical value for significance level alpha. That is, z-value for 1-alpha.
Returns:
float: statistical power --- that is, the probability of a test to detect an effect,
if the effect actually exists.
"""
effect_size = mean1 - mean2
std = pooled_std(std1, n1, std2, n2)
tmp = (n1 * n2 * effect_size**2) / ((n1 + n2) * std**2)
z_beta = z_1_minus_alpha - np.sqrt(tmp)
beta = stats.norm.cdf(z_beta)
power = 1 - beta
return power
def test_import_trajectory_interp_nans(self):
fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht', 'ortho_ht', 'num_sats', 'pdop']
df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'),
columns=fields, skiprows=1, timeformat='hms',
interp=True)
# Test and verify an arbitrary line of data against the same line in the pandas DataFrame
line11 = ['3/22/2017', '9:59:00.20', 76.5350241071, -68.7218956324, 65.898, 82.778, 11, 2.00]
sample_line = dict(zip(fields, line11))
np.testing.assert_almost_equal(df.lat[10], sample_line['lat'], decimal=10)
np.testing.assert_almost_equal(df.long[10], sample_line['long'], decimal=10)
numeric = df.select_dtypes(include=[np.number])
# check whether NaNs were interpolated for numeric type fields
self.assertTrue(numeric.iloc[[2]].notnull().values.all())
def test_import_trajectory_fields(self):
# test number of fields in data greater than number of fields named
fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht']
df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'),
columns=fields, skiprows=1, timeformat='hms')
columns = [x for x in fields if x is not None]
np.testing.assert_array_equal(df.columns, columns[2:])
# test fields in the middle are dropped
fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht', None, 'num_sats', 'pdop']
df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'),
columns=fields, skiprows=1, timeformat='hms')
columns = [x for x in fields if x is not None]
np.testing.assert_array_equal(df.columns, columns[2:])
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return an array rounded a to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
if result.ndim > 0:
result._mask = self._mask
result._update_from(self)
elif self._mask:
# Return masked when the scalar is masked
result = masked
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def get_binary_op_return_class(cls1, cls2):
if cls1 is cls2:
return cls1
if cls1 in (np.ndarray, np.matrix, np.ma.masked_array) or issubclass(cls1, (numeric_type, np.number, list, tuple)):
return cls2
if cls2 in (np.ndarray, np.matrix, np.ma.masked_array) or issubclass(cls2, (numeric_type, np.number, list, tuple)):
return cls1
if issubclass(cls1, YTQuantity):
return cls2
if issubclass(cls2, YTQuantity):
return cls1
if issubclass(cls1, cls2):
return cls1
if issubclass(cls2, cls1):
return cls2
else:
raise RuntimeError("Undefined operation for a YTArray subclass. "
"Received operand types (%s) and (%s)" % (cls1, cls2))
def transform(self, X, y=None):
"""Apply dimensionality reduction to X.
X is masked.
Parameters
----------
X : array-like, shape (n_samples, n_features)
New data, where n_samples is the number of samples
and n_features is the number of features.
Returns
-------
X_new : array-like, shape (n_samples, n_components)
"""
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
check_is_fitted(self, ['mask_'], all_or_any=all)
X = check_array(X)
return X[:, self.mask_]
def transform(self, X, y=None):
"""Apply dimensionality reduction to X.
X is masked.
Parameters
----------
X : array-like, shape (n_samples, n_features)
New data, where n_samples is the number of samples
and n_features is the number of features.
Returns
-------
X_new : array-like, shape (n_samples, n_components)
"""
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
check_is_fitted(self, ['mask_'], all_or_any=all)
if hasattr(X, 'columns'):
X = X.values
X = check_array(X[:, self.mask_])
return X
test_regression.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def fit(self, X, y=None):
# Return if not imputing
if self.impute is False:
return self
# Grab list of object column names before doing imputation
self.object_columns = X.select_dtypes(include=['object']).columns.values
self.fill = pd.Series([X[c].value_counts().index[0]
if X[c].dtype == np.dtype('O')
or pd.core.common.is_categorical_dtype(X[c])
else X[c].mean() for c in X], index=X.columns)
if self.verbose:
num_nans = sum(X.select_dtypes(include=[np.number]).isnull().sum())
num_total = sum(X.select_dtypes(include=[np.number]).count())
percentage_imputed = num_nans / num_total * 100
print("Percentage Imputed: %.2f%%" % percentage_imputed)
print("Note: Impute will always happen on prediction dataframe, otherwise rows are dropped, and will lead "
"to missing predictions")
# return self for scikit compatibility
return self
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return an array rounded a to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
result._mask = self._mask
result._update_from(self)
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def load_MNIST_images(filename):
"""
returns a 28x28x[number of MNIST images] matrix containing
the raw MNIST images
:param filename: input data file
"""
with open(filename, "r") as f:
magic = np.fromfile(f, dtype=np.dtype('>i4'), count=1)
num_images = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1))
num_rows = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1))
num_cols = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1))
images = np.fromfile(f, dtype=np.ubyte)
images = images.reshape((num_images, num_rows * num_cols)).transpose()
images = images.astype(np.float64) / 255
f.close()
return images
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], np.bool_) # not x[0] because it is unordered
failures = []
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return each element rounded to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
ndarray.around : corresponding function for ndarrays
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
if result.ndim > 0:
result._mask = self._mask
result._update_from(self)
elif self._mask:
# Return masked when the scalar is masked
result = masked
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def get_numeric_subclasses(cls=numpy.number, ignore=None):
"""
Return subclasses of `cls` in the numpy scalar hierarchy.
We only return subclasses that correspond to unique data types.
The hierarchy can be seen here:
http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html
"""
if ignore is None:
ignore = []
rval = []
dtype = numpy.dtype(cls)
dtype_num = dtype.num
if dtype_num not in ignore:
# Safety check: we should be able to represent 0 with this data type.
numpy.array(0, dtype=dtype)
rval.append(cls)
ignore.append(dtype_num)
for sub in cls.__subclasses__():
rval += [c for c in get_numeric_subclasses(sub, ignore=ignore)]
return rval
def largest(*args):
"""
Return the [elementwise] largest of a variable number of arguments.
Like python's max.
"""
if len(args) == 2:
a, b = args
return switch(a > b, a, b)
else:
return max(stack(args), axis=0)
##########################
# Comparison
##########################
def reshape(x, newshape, ndim=None):
if ndim is None:
newshape = as_tensor_variable(newshape)
if newshape.ndim != 1:
raise TypeError(
"New shape in reshape must be a vector or a list/tuple of"
" scalar. Got %s after conversion to a vector." % newshape)
try:
ndim = get_vector_length(newshape)
except ValueError:
raise ValueError(
"The length of the provided shape (%s) cannot "
"be automatically determined, so Theano is not able "
"to know what the number of dimensions of the reshaped "
"variable will be. You can provide the 'ndim' keyword "
"argument to 'reshape' to avoid this problem." % newshape)
op = Reshape(ndim)
rval = op(x, newshape)
return rval