def break_info(self, range=None):
if range is None:
range = self.dimension()
# for discrete, limits != range
limits = self.limits
major = self.get_breaks(limits)
minor = []
if major is None:
major = labels = []
else:
labels = self.get_labels(major)
major = pd.Categorical(major.keys())
major = self.map(major)
return {'range': range,
'labels': labels,
'major': major,
'minor': minor}
python类Categorical()的实例源码
def map(self, data, layout):
if not len(data):
data['PANEL'] = pd.Categorical(
[],
categories=layout['PANEL'].cat.categories,
ordered=True)
return data
facet_vals = eval_facet_vars(data, self.vars, self.plot.environment)
data, facet_vals = add_missing_facets(data, layout,
self.vars, facet_vals)
# assign each point to a panel
keys = join_keys(facet_vals, layout, self.vars)
data['PANEL'] = match(keys['x'], keys['y'], start=1)
data = data.sort_values('PANEL', kind='mergesort')
# matching dtype
data['PANEL'] = pd.Categorical(
data['PANEL'],
categories=layout['PANEL'].cat.categories,
ordered=True)
data.reset_index(drop=True, inplace=True)
return data
def from_categorical(cls, categorical, missing_value=None):
"""
Create a LabelArray from a pandas categorical.
Parameters
----------
categorical : pd.Categorical
The categorical object to convert.
missing_value : bytes, unicode, or None, optional
The missing value to use for this LabelArray.
Returns
-------
la : LabelArray
The LabelArray representation of this categorical.
"""
return LabelArray(
categorical,
missing_value,
categorical.categories,
)
def as_categorical(self, name=None):
"""
Coerce self into a pandas categorical.
This is only defined on 1D arrays, since that's all pandas supports.
"""
if len(self.shape) > 1:
raise ValueError("Can't convert a 2D array to a categorical.")
with ignore_pandas_nan_categorical_warning():
return pd.Categorical.from_codes(
self.as_int_array(),
# We need to make a copy because pandas >= 0.17 fails if this
# buffer isn't writeable.
self.categories.copy(),
ordered=False,
name=name,
)
def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
is_dask = isinstance(X, dd.DataFrame)
if is_dask:
X = X.categorize()
X = X.copy() if hasattr(X, 'copy') else X
categories = self.cat_cols_
for k in categories:
cat = (categories.get(k, None)
if hasattr(categories, 'get')
else None)
ordered = self.ordered.get(k, False)
# can't use Categorical constructor since dask compat
if not is_dask:
X[k] = pd.Categorical(X[k])
if cat:
X[k] = X[k].cat.set_categories(cat)
if ordered:
X[k] = X[k].cat.as_ordered()
return X
def inverse_transform(self, X):
non_cat = pd.DataFrame(X[:, :len(self.non_cat_columns_)],
columns=self.non_cat_columns_)
cats = []
for col in self.cat_columns_:
slice_ = self.cat_blocks_[col]
categories = self.categories_map_[col]
ordered = self.ordered_map_[col]
codes = X[:, slice_].argmax(1)
series = pd.Series(pd.Categorical.from_codes(
codes, categories, ordered=ordered
), name=col)
cats.append(series)
df = pd.concat([non_cat] + cats, axis=1)[self.columns_]
return df
def _get_table(self, column, is_size=True):
cols = list(range(5))
cols.append(self.header.index(column))
header = [self.header[c] for c in cols]
rows = [
[row[c] for c in cols]
for row in self.rows
]
if is_size:
for row in rows:
row[5] = parse_size(row[5])
table = pd.DataFrame.from_records(rows, columns=header)
table = table.rename(columns={
'prog' : 'Program',
'prog2' : 'Program2',
'threads' : 'Threads',
'dataset' : 'Dataset',
'qcut' : 'Quality',
})
table['Threads'] = pd.to_numeric(table['Threads'])
table['Dataset'] = pd.Categorical(table['Dataset'])
table['Program'] = pd.Categorical(table['Program'])
table['Program2'] = pd.Categorical(table['Program2'])
return table
test_category.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_reindex_dtype(self):
res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c'
])
tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(
Categorical(['a', 'c']))
tm.assert_index_equal(res, CategoricalIndex(
['a', 'a', 'c'], categories=['a', 'c']), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
res, indexer = CategoricalIndex(
['a', 'b', 'c', 'a'
], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c'])
tm.assert_index_equal(res, Index(
['a', 'a', 'c'], dtype='object'), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
res, indexer = CategoricalIndex(
['a', 'b', 'c', 'a'],
categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c']))
tm.assert_index_equal(res, CategoricalIndex(
['a', 'a', 'c'], categories=['a', 'c']), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
test_tseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_categorical(self):
# GH 8974
from pandas import Categorical, Series
arr = Categorical(list('abc'))
result = lib.infer_dtype(arr)
self.assertEqual(result, 'categorical')
result = lib.infer_dtype(Series(arr))
self.assertEqual(result, 'categorical')
arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
result = lib.infer_dtype(arr)
self.assertEqual(result, 'categorical')
result = lib.infer_dtype(Series(arr))
self.assertEqual(result, 'categorical')
test_style.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def setUp(self):
np.random.seed(24)
self.s = DataFrame({'A': np.random.permutation(range(6))})
self.df = DataFrame({'A': [0, 1], 'B': np.random.randn(2)})
self.f = lambda x: x
self.g = lambda x: x
def h(x, foo='bar'):
return pd.Series(['color: %s' % foo], index=x.index, name=x.name)
self.h = h
self.styler = Styler(self.df)
self.attrs = pd.DataFrame({'A': ['color: red', 'color: blue']})
self.dataframes = [
self.df,
pd.DataFrame({'f': [1., 2.], 'o': ['a', 'b'],
'c': pd.Categorical(['a', 'b'])})
]
test_to_csv.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def test_to_csv_from_csv_categorical(self):
# CSV with categoricals should result in the same output as when one
# would add a "normal" Series/DataFrame.
s = Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
s2 = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
res = StringIO()
s.to_csv(res)
exp = StringIO()
s2.to_csv(exp)
self.assertEqual(res.getvalue(), exp.getvalue())
df = DataFrame({"s": s})
df2 = DataFrame({"s": s2})
res = StringIO()
df.to_csv(res)
exp = StringIO()
df2.to_csv(exp)
self.assertEqual(res.getvalue(), exp.getvalue())
test_reshape.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def test_dataframe_dummies_with_categorical(self):
df = self.df
df['cat'] = pd.Categorical(['x', 'y', 'y'])
result = get_dummies(df, sparse=self.sparse)
expected = DataFrame({'C': [1, 2, 3],
'A_a': [1., 0, 1],
'A_b': [0., 1, 0],
'B_b': [1., 1, 0],
'B_c': [0., 0, 1],
'cat_x': [1., 0, 0],
'cat_y': [0., 1, 1]})
expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c', 'cat_x', 'cat_y'
]]
assert_frame_equal(result, expected)
# GH12402 Add a new parameter `drop_first` to avoid collinearity
test_generic.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def test_unexpected_keyword(self): # GH8597
df = DataFrame(np.random.randn(5, 2), columns=['jim', 'joe'])
ca = pd.Categorical([0, 0, 2, 2, 3, np.nan])
ts = df['joe'].copy()
ts[2] = np.nan
with assertRaisesRegexp(TypeError, 'unexpected keyword'):
df.drop('joe', axis=1, in_place=True)
with assertRaisesRegexp(TypeError, 'unexpected keyword'):
df.reindex([1, 0], inplace=True)
with assertRaisesRegexp(TypeError, 'unexpected keyword'):
ca.fillna(0, inplace=True)
with assertRaisesRegexp(TypeError, 'unexpected keyword'):
ts.fillna(0, in_place=True)
# See gh-12301
test_generic.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_describe_typefiltering_category_bool(self):
df = DataFrame({'A_cat': pd.Categorical(['foo', 'foo', 'bar'] * 8),
'B_str': ['a', 'b', 'c', 'd'] * 6,
'C_bool': [True] * 12 + [False] * 12,
'D_num': np.arange(24.) + .5,
'E_ts': tm.makeTimeSeries()[:24].index})
desc = df.describe()
expected_cols = ['D_num']
expected = DataFrame(dict((k, df[k].describe())
for k in expected_cols),
columns=expected_cols)
assert_frame_equal(desc, expected)
desc = df.describe(include=["category"])
self.assertTrue(desc.columns.tolist() == ["A_cat"])
# 'all' includes numpy-dtypes + category
desc1 = df.describe(include="all")
desc2 = df.describe(include=[np.generic, "category"])
assert_frame_equal(desc1, desc2)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def test_setitem(self):
# int/positional
c = self.factor.copy()
c[0] = 'b'
self.assertEqual(c[0], 'b')
c[-1] = 'a'
self.assertEqual(c[-1], 'a')
# boolean
c = self.factor.copy()
indexer = np.zeros(len(c), dtype='bool')
indexer[0] = True
indexer[-1] = True
c[indexer] = 'c'
expected = Categorical.from_array(['c', 'b', 'b', 'a',
'a', 'c', 'c', 'c'], ordered=True)
self.assert_categorical_equal(c, expected)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_constructor_unsortable(self):
# it works!
arr = np.array([1, 2, 3, datetime.now()], dtype='O')
factor = Categorical.from_array(arr, ordered=False)
self.assertFalse(factor.ordered)
if compat.PY3:
self.assertRaises(
TypeError, lambda: Categorical.from_array(arr, ordered=True))
else:
# this however will raise as cannot be sorted (on PY3 or older
# numpies)
if LooseVersion(np.__version__) < "1.10":
self.assertRaises(
TypeError,
lambda: Categorical.from_array(arr, ordered=True))
else:
Categorical.from_array(arr, ordered=True)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def test_is_equal_dtype(self):
# test dtype comparisons between cats
c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False)
c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False)
c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True)
self.assertTrue(c1.is_dtype_equal(c1))
self.assertTrue(c2.is_dtype_equal(c2))
self.assertTrue(c3.is_dtype_equal(c3))
self.assertFalse(c1.is_dtype_equal(c2))
self.assertFalse(c1.is_dtype_equal(c3))
self.assertFalse(c1.is_dtype_equal(Index(list('aabca'))))
self.assertFalse(c1.is_dtype_equal(c1.astype(object)))
self.assertTrue(c1.is_dtype_equal(CategoricalIndex(c1)))
self.assertFalse(c1.is_dtype_equal(
CategoricalIndex(c1, categories=list('cab'))))
self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1, ordered=True)))
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def test_constructor_with_generator(self):
# This was raising an Error in isnull(single_val).any() because isnull
# returned a scalar for a generator
xrange = range
exp = Categorical([0, 1, 2])
cat = Categorical((x for x in [0, 1, 2]))
self.assertTrue(cat.equals(exp))
cat = Categorical(xrange(3))
self.assertTrue(cat.equals(exp))
# This uses xrange internally
from pandas.core.index import MultiIndex
MultiIndex.from_product([range(5), ['a', 'b', 'c']])
# check that categories accept generators and sequences
cat = pd.Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
self.assertTrue(cat.equals(exp))
cat = pd.Categorical([0, 1, 2], categories=xrange(3))
self.assertTrue(cat.equals(exp))
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_empty_print(self):
factor = Categorical([], ["a", "b", "c"])
expected = ("[], Categories (3, object): [a, b, c]")
# hack because array_repr changed in numpy > 1.6.x
actual = repr(factor)
self.assertEqual(actual, expected)
self.assertEqual(expected, actual)
factor = Categorical([], ["a", "b", "c"], ordered=True)
expected = ("[], Categories (3, object): [a < b < c]")
actual = repr(factor)
self.assertEqual(expected, actual)
factor = Categorical([], [])
expected = ("[], Categories (0, object): []")
self.assertEqual(expected, repr(factor))
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_categories_assigments(self):
s = pd.Categorical(["a", "b", "c", "a"])
exp = np.array([1, 2, 3, 1])
s.categories = [1, 2, 3]
self.assert_numpy_array_equal(s.__array__(), exp)
self.assert_numpy_array_equal(s.categories, np.array([1, 2, 3]))
# lengthen
def f():
s.categories = [1, 2, 3, 4]
self.assertRaises(ValueError, f)
# shorten
def f():
s.categories = [1, 2]
self.assertRaises(ValueError, f)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_ordered_api(self):
# GH 9347
cat1 = pd.Categorical(["a", "c", "b"], ordered=False)
self.assertTrue(cat1.categories.equals(Index(['a', 'b', 'c'])))
self.assertFalse(cat1.ordered)
cat2 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'],
ordered=False)
self.assertTrue(cat2.categories.equals(Index(['b', 'c', 'a'])))
self.assertFalse(cat2.ordered)
cat3 = pd.Categorical(["a", "c", "b"], ordered=True)
self.assertTrue(cat3.categories.equals(Index(['a', 'b', 'c'])))
self.assertTrue(cat3.ordered)
cat4 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'],
ordered=True)
self.assertTrue(cat4.categories.equals(Index(['b', 'c', 'a'])))
self.assertTrue(cat4.ordered)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def test_remove_categories(self):
cat = Categorical(["a", "b", "c", "a"], ordered=True)
old = cat.copy()
new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
ordered=True)
# first inplace == False
res = cat.remove_categories("c")
self.assert_categorical_equal(cat, old)
self.assert_categorical_equal(res, new)
res = cat.remove_categories(["c"])
self.assert_categorical_equal(cat, old)
self.assert_categorical_equal(res, new)
# inplace == True
res = cat.remove_categories("c", inplace=True)
self.assert_categorical_equal(cat, new)
self.assertIsNone(res)
# removal is not in categories
def f():
cat.remove_categories(["c"])
self.assertRaises(ValueError, f)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def test_shift(self):
# GH 9416
cat = pd.Categorical(['a', 'b', 'c', 'd', 'a'])
# shift forward
sp1 = cat.shift(1)
xp1 = pd.Categorical([np.nan, 'a', 'b', 'c', 'd'])
self.assert_categorical_equal(sp1, xp1)
self.assert_categorical_equal(cat[:-1], sp1[1:])
# shift back
sn2 = cat.shift(-2)
xp2 = pd.Categorical(['c', 'd', 'a', np.nan, np.nan],
categories=['a', 'b', 'c', 'd'])
self.assert_categorical_equal(sn2, xp2)
self.assert_categorical_equal(cat[2:], sn2[:-2])
# shift by zero
self.assert_categorical_equal(cat, cat.shift(0))
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def test_dtypes(self):
# GH8143
index = ['cat', 'obj', 'num']
cat = pd.Categorical(['a', 'b', 'c'])
obj = pd.Series(['a', 'b', 'c'])
num = pd.Series([1, 2, 3])
df = pd.concat([pd.Series(cat), obj, num], axis=1, keys=index)
result = df.dtypes == 'object'
expected = Series([False, True, False], index=index)
tm.assert_series_equal(result, expected)
result = df.dtypes == 'int64'
expected = Series([False, False, True], index=index)
tm.assert_series_equal(result, expected)
result = df.dtypes == 'category'
expected = Series([True, False, False], index=index)
tm.assert_series_equal(result, expected)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def test_codes_dtypes(self):
# GH 8453
result = Categorical(['foo', 'bar', 'baz'])
self.assertTrue(result.codes.dtype == 'int8')
result = Categorical(['foo%05d' % i for i in range(400)])
self.assertTrue(result.codes.dtype == 'int16')
result = Categorical(['foo%05d' % i for i in range(40000)])
self.assertTrue(result.codes.dtype == 'int32')
# adding cats
result = Categorical(['foo', 'bar', 'baz'])
self.assertTrue(result.codes.dtype == 'int8')
result = result.add_categories(['foo%05d' % i for i in range(400)])
self.assertTrue(result.codes.dtype == 'int16')
# removing cats
result = result.remove_categories(['foo%05d' % i for i in range(300)])
self.assertTrue(result.codes.dtype == 'int8')
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def test_reshaping(self):
p = tm.makePanel()
p['str'] = 'foo'
df = p.to_frame()
df['category'] = df['str'].astype('category')
result = df['category'].unstack()
c = Categorical(['foo'] * len(p.major_axis))
expected = DataFrame({'A': c.copy(),
'B': c.copy(),
'C': c.copy(),
'D': c.copy()},
columns=Index(list('ABCD'), name='minor'),
index=p.major_axis.set_names('major'))
tm.assert_frame_equal(result, expected)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_reindex(self):
index = pd.date_range('20000101', periods=3)
# reindexing to an invalid Categorical
s = Series(['a', 'b', 'c'], dtype='category')
result = s.reindex(index)
expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
categories=['a', 'b', 'c']))
expected.index = index
tm.assert_series_equal(result, expected)
# partial reindexing
expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b',
'c']))
expected.index = [1, 2]
result = s.reindex([1, 2])
tm.assert_series_equal(result, expected)
expected = Series(Categorical(
values=['c', np.nan], categories=['a', 'b', 'c']))
expected.index = [2, 3]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def test_nan_handling(self):
# Nans are represented as -1 in labels
s = Series(Categorical(["a", "b", np.nan, "a"]))
self.assert_numpy_array_equal(s.cat.categories, np.array(["a", "b"]))
self.assert_numpy_array_equal(s.values.codes, np.array([0, 1, -1, 0]))
# If categories have nan included, the label should point to that
# instead
with tm.assert_produces_warning(FutureWarning):
s2 = Series(Categorical(
["a", "b", np.nan, "a"], categories=["a", "b", np.nan]))
self.assert_numpy_array_equal(s2.cat.categories, np.array(
["a", "b", np.nan], dtype=np.object_))
self.assert_numpy_array_equal(s2.values.codes, np.array([0, 1, 2, 0]))
# Changing categories should also make the replaced category np.nan
s3 = Series(Categorical(["a", "b", "c", "a"]))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
s3.cat.categories = ["a", "b", np.nan]
self.assert_numpy_array_equal(s3.cat.categories, np.array(
["a", "b", np.nan], dtype=np.object_))
self.assert_numpy_array_equal(s3.values.codes, np.array([0, 1, 2, 0]))
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_sequence_like(self):
# GH 7839
# make sure can iterate
df = DataFrame({"id": [1, 2, 3, 4, 5, 6],
"raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
df['grade'] = Categorical(df['raw_grade'])
# basic sequencing testing
result = list(df.grade.values)
expected = np.array(df.grade.values).tolist()
tm.assert_almost_equal(result, expected)
# iteration
for t in df.itertuples(index=False):
str(t)
for row, s in df.iterrows():
str(s)
for c, col in df.iteritems():
str(s)
test_categorical.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def test_describe(self):
# Categoricals should not show up together with numerical columns
result = self.cat.describe()
self.assertEqual(len(result.columns), 1)
# In a frame, describe() for the cat should be the same as for string
# arrays (count, unique, top, freq)
cat = Categorical(["a", "b", "b", "b"], categories=['a', 'b', 'c'],
ordered=True)
s = Series(cat)
result = s.describe()
expected = Series([4, 2, "b", 3],
index=['count', 'unique', 'top', 'freq'])
tm.assert_series_equal(result, expected)
cat = pd.Series(pd.Categorical(["a", "b", "c", "c"]))
df3 = pd.DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]})
res = df3.describe()
self.assert_numpy_array_equal(res["cat"].values, res["s"].values)