python类Categorical()的实例源码

scale.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def break_info(self, range=None):
        if range is None:
            range = self.dimension()
        # for discrete, limits != range
        limits = self.limits
        major = self.get_breaks(limits)
        minor = []
        if major is None:
            major = labels = []
        else:
            labels = self.get_labels(major)
            major = pd.Categorical(major.keys())
            major = self.map(major)
        return {'range': range,
                'labels': labels,
                'major': major,
                'minor': minor}
facet_wrap.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def map(self, data, layout):
        if not len(data):
            data['PANEL'] = pd.Categorical(
                [],
                categories=layout['PANEL'].cat.categories,
                ordered=True)
            return data

        facet_vals = eval_facet_vars(data, self.vars, self.plot.environment)
        data, facet_vals = add_missing_facets(data, layout,
                                              self.vars, facet_vals)

        # assign each point to a panel
        keys = join_keys(facet_vals, layout, self.vars)
        data['PANEL'] = match(keys['x'], keys['y'], start=1)
        data = data.sort_values('PANEL', kind='mergesort')

        # matching dtype
        data['PANEL'] = pd.Categorical(
            data['PANEL'],
            categories=layout['PANEL'].cat.categories,
            ordered=True)

        data.reset_index(drop=True, inplace=True)
        return data
labelarray.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def from_categorical(cls, categorical, missing_value=None):
        """
        Create a LabelArray from a pandas categorical.

        Parameters
        ----------
        categorical : pd.Categorical
            The categorical object to convert.
        missing_value : bytes, unicode, or None, optional
            The missing value to use for this LabelArray.

        Returns
        -------
        la : LabelArray
            The LabelArray representation of this categorical.
        """
        return LabelArray(
            categorical,
            missing_value,
            categorical.categories,
        )
labelarray.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def as_categorical(self, name=None):
        """
        Coerce self into a pandas categorical.

        This is only defined on 1D arrays, since that's all pandas supports.
        """
        if len(self.shape) > 1:
            raise ValueError("Can't convert a 2D array to a categorical.")

        with ignore_pandas_nan_categorical_warning():
            return pd.Categorical.from_codes(
                self.as_int_array(),
                # We need to make a copy because pandas >= 0.17 fails if this
                # buffer isn't writeable.
                self.categories.copy(),
                ordered=False,
                name=name,
            )
preprocessing.py 文件源码 项目:sktransformers 作者: TomAugspurger 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
        is_dask = isinstance(X, dd.DataFrame)
        if is_dask:
            X = X.categorize()

        X = X.copy() if hasattr(X, 'copy') else X
        categories = self.cat_cols_
        for k in categories:
            cat = (categories.get(k, None)
                   if hasattr(categories, 'get')
                   else None)
            ordered = self.ordered.get(k, False)
            # can't use Categorical constructor since dask compat
            if not is_dask:
                X[k] = pd.Categorical(X[k])
            if cat:
                X[k] = X[k].cat.set_categories(cat)
            if ordered:
                X[k] = X[k].cat.as_ordered()
        return X
preprocessing.py 文件源码 项目:sktransformers 作者: TomAugspurger 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def inverse_transform(self, X):
        non_cat = pd.DataFrame(X[:, :len(self.non_cat_columns_)],
                               columns=self.non_cat_columns_)
        cats = []
        for col in self.cat_columns_:
            slice_ = self.cat_blocks_[col]
            categories = self.categories_map_[col]
            ordered = self.ordered_map_[col]

            codes = X[:, slice_].argmax(1)
            series = pd.Series(pd.Categorical.from_codes(
                codes, categories, ordered=ordered
            ), name=col)
            cats.append(series)
        df = pd.concat([non_cat] + cats, axis=1)[self.columns_]
        return df
show_job_info.py 文件源码 项目:atropos 作者: jdidion 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _get_table(self, column, is_size=True):
        cols = list(range(5))
        cols.append(self.header.index(column))
        header = [self.header[c] for c in cols]
        rows = [
            [row[c] for c in cols]
            for row in self.rows
        ]
        if is_size:
            for row in rows:
                row[5] = parse_size(row[5])
        table = pd.DataFrame.from_records(rows, columns=header)
        table = table.rename(columns={ 
            'prog' : 'Program',
            'prog2' : 'Program2',
            'threads' : 'Threads',
            'dataset' : 'Dataset',
            'qcut' : 'Quality',
        })
        table['Threads'] = pd.to_numeric(table['Threads'])
        table['Dataset'] = pd.Categorical(table['Dataset'])
        table['Program'] = pd.Categorical(table['Program'])
        table['Program2'] = pd.Categorical(table['Program2'])
        return table
test_category.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_reindex_dtype(self):
        res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c'
                                                                       ])
        tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(
            Categorical(['a', 'c']))
        tm.assert_index_equal(res, CategoricalIndex(
            ['a', 'a', 'c'], categories=['a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(
            ['a', 'b', 'c', 'a'
             ], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c'])
        tm.assert_index_equal(res, Index(
            ['a', 'a', 'c'], dtype='object'), exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(
            ['a', 'b', 'c', 'a'],
            categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c']))
        tm.assert_index_equal(res, CategoricalIndex(
            ['a', 'a', 'c'], categories=['a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
test_tseries.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_categorical(self):

        # GH 8974
        from pandas import Categorical, Series
        arr = Categorical(list('abc'))
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'categorical')

        result = lib.infer_dtype(Series(arr))
        self.assertEqual(result, 'categorical')

        arr = Categorical(list('abc'), categories=['cegfab'], ordered=True)
        result = lib.infer_dtype(arr)
        self.assertEqual(result, 'categorical')

        result = lib.infer_dtype(Series(arr))
        self.assertEqual(result, 'categorical')
test_style.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def setUp(self):
        np.random.seed(24)
        self.s = DataFrame({'A': np.random.permutation(range(6))})
        self.df = DataFrame({'A': [0, 1], 'B': np.random.randn(2)})
        self.f = lambda x: x
        self.g = lambda x: x

        def h(x, foo='bar'):
            return pd.Series(['color: %s' % foo], index=x.index, name=x.name)

        self.h = h
        self.styler = Styler(self.df)
        self.attrs = pd.DataFrame({'A': ['color: red', 'color: blue']})
        self.dataframes = [
            self.df,
            pd.DataFrame({'f': [1., 2.], 'o': ['a', 'b'],
                          'c': pd.Categorical(['a', 'b'])})
        ]
test_to_csv.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_to_csv_from_csv_categorical(self):

        # CSV with categoricals should result in the same output as when one
        # would add a "normal" Series/DataFrame.
        s = Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']))
        s2 = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
        res = StringIO()
        s.to_csv(res)
        exp = StringIO()
        s2.to_csv(exp)
        self.assertEqual(res.getvalue(), exp.getvalue())

        df = DataFrame({"s": s})
        df2 = DataFrame({"s": s2})
        res = StringIO()
        df.to_csv(res)
        exp = StringIO()
        df2.to_csv(exp)
        self.assertEqual(res.getvalue(), exp.getvalue())
test_reshape.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def test_dataframe_dummies_with_categorical(self):
        df = self.df
        df['cat'] = pd.Categorical(['x', 'y', 'y'])
        result = get_dummies(df, sparse=self.sparse)
        expected = DataFrame({'C': [1, 2, 3],
                              'A_a': [1., 0, 1],
                              'A_b': [0., 1, 0],
                              'B_b': [1., 1, 0],
                              'B_c': [0., 0, 1],
                              'cat_x': [1., 0, 0],
                              'cat_y': [0., 1, 1]})
        expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c', 'cat_x', 'cat_y'
                             ]]
        assert_frame_equal(result, expected)

    # GH12402 Add a new parameter `drop_first` to avoid collinearity
test_generic.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_unexpected_keyword(self):  # GH8597
        df = DataFrame(np.random.randn(5, 2), columns=['jim', 'joe'])
        ca = pd.Categorical([0, 0, 2, 2, 3, np.nan])
        ts = df['joe'].copy()
        ts[2] = np.nan

        with assertRaisesRegexp(TypeError, 'unexpected keyword'):
            df.drop('joe', axis=1, in_place=True)

        with assertRaisesRegexp(TypeError, 'unexpected keyword'):
            df.reindex([1, 0], inplace=True)

        with assertRaisesRegexp(TypeError, 'unexpected keyword'):
            ca.fillna(0, inplace=True)

        with assertRaisesRegexp(TypeError, 'unexpected keyword'):
            ts.fillna(0, in_place=True)

    # See gh-12301
test_generic.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_describe_typefiltering_category_bool(self):
        df = DataFrame({'A_cat': pd.Categorical(['foo', 'foo', 'bar'] * 8),
                        'B_str': ['a', 'b', 'c', 'd'] * 6,
                        'C_bool': [True] * 12 + [False] * 12,
                        'D_num': np.arange(24.) + .5,
                        'E_ts': tm.makeTimeSeries()[:24].index})

        desc = df.describe()
        expected_cols = ['D_num']
        expected = DataFrame(dict((k, df[k].describe())
                                  for k in expected_cols),
                             columns=expected_cols)
        assert_frame_equal(desc, expected)

        desc = df.describe(include=["category"])
        self.assertTrue(desc.columns.tolist() == ["A_cat"])

        # 'all' includes numpy-dtypes + category
        desc1 = df.describe(include="all")
        desc2 = df.describe(include=[np.generic, "category"])
        assert_frame_equal(desc1, desc2)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_setitem(self):

        # int/positional
        c = self.factor.copy()
        c[0] = 'b'
        self.assertEqual(c[0], 'b')
        c[-1] = 'a'
        self.assertEqual(c[-1], 'a')

        # boolean
        c = self.factor.copy()
        indexer = np.zeros(len(c), dtype='bool')
        indexer[0] = True
        indexer[-1] = True
        c[indexer] = 'c'
        expected = Categorical.from_array(['c', 'b', 'b', 'a',
                                           'a', 'c', 'c', 'c'], ordered=True)

        self.assert_categorical_equal(c, expected)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_constructor_unsortable(self):

        # it works!
        arr = np.array([1, 2, 3, datetime.now()], dtype='O')
        factor = Categorical.from_array(arr, ordered=False)
        self.assertFalse(factor.ordered)

        if compat.PY3:
            self.assertRaises(
                TypeError, lambda: Categorical.from_array(arr, ordered=True))
        else:
            # this however will raise as cannot be sorted (on PY3 or older
            # numpies)
            if LooseVersion(np.__version__) < "1.10":
                self.assertRaises(
                    TypeError,
                    lambda: Categorical.from_array(arr, ordered=True))
            else:
                Categorical.from_array(arr, ordered=True)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_is_equal_dtype(self):

        # test dtype comparisons between cats

        c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False)
        c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False)
        c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True)
        self.assertTrue(c1.is_dtype_equal(c1))
        self.assertTrue(c2.is_dtype_equal(c2))
        self.assertTrue(c3.is_dtype_equal(c3))
        self.assertFalse(c1.is_dtype_equal(c2))
        self.assertFalse(c1.is_dtype_equal(c3))
        self.assertFalse(c1.is_dtype_equal(Index(list('aabca'))))
        self.assertFalse(c1.is_dtype_equal(c1.astype(object)))
        self.assertTrue(c1.is_dtype_equal(CategoricalIndex(c1)))
        self.assertFalse(c1.is_dtype_equal(
            CategoricalIndex(c1, categories=list('cab'))))
        self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1, ordered=True)))
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def test_constructor_with_generator(self):
        # This was raising an Error in isnull(single_val).any() because isnull
        # returned a scalar for a generator
        xrange = range

        exp = Categorical([0, 1, 2])
        cat = Categorical((x for x in [0, 1, 2]))
        self.assertTrue(cat.equals(exp))
        cat = Categorical(xrange(3))
        self.assertTrue(cat.equals(exp))

        # This uses xrange internally
        from pandas.core.index import MultiIndex
        MultiIndex.from_product([range(5), ['a', 'b', 'c']])

        # check that categories accept generators and sequences
        cat = pd.Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
        self.assertTrue(cat.equals(exp))
        cat = pd.Categorical([0, 1, 2], categories=xrange(3))
        self.assertTrue(cat.equals(exp))
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_empty_print(self):
        factor = Categorical([], ["a", "b", "c"])
        expected = ("[], Categories (3, object): [a, b, c]")
        # hack because array_repr changed in numpy > 1.6.x
        actual = repr(factor)
        self.assertEqual(actual, expected)

        self.assertEqual(expected, actual)
        factor = Categorical([], ["a", "b", "c"], ordered=True)
        expected = ("[], Categories (3, object): [a < b < c]")
        actual = repr(factor)
        self.assertEqual(expected, actual)

        factor = Categorical([], [])
        expected = ("[], Categories (0, object): []")
        self.assertEqual(expected, repr(factor))
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_categories_assigments(self):
        s = pd.Categorical(["a", "b", "c", "a"])
        exp = np.array([1, 2, 3, 1])
        s.categories = [1, 2, 3]
        self.assert_numpy_array_equal(s.__array__(), exp)
        self.assert_numpy_array_equal(s.categories, np.array([1, 2, 3]))

        # lengthen
        def f():
            s.categories = [1, 2, 3, 4]

        self.assertRaises(ValueError, f)

        # shorten
        def f():
            s.categories = [1, 2]

        self.assertRaises(ValueError, f)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_ordered_api(self):
        # GH 9347
        cat1 = pd.Categorical(["a", "c", "b"], ordered=False)
        self.assertTrue(cat1.categories.equals(Index(['a', 'b', 'c'])))
        self.assertFalse(cat1.ordered)

        cat2 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'],
                              ordered=False)
        self.assertTrue(cat2.categories.equals(Index(['b', 'c', 'a'])))
        self.assertFalse(cat2.ordered)

        cat3 = pd.Categorical(["a", "c", "b"], ordered=True)
        self.assertTrue(cat3.categories.equals(Index(['a', 'b', 'c'])))
        self.assertTrue(cat3.ordered)

        cat4 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'],
                              ordered=True)
        self.assertTrue(cat4.categories.equals(Index(['b', 'c', 'a'])))
        self.assertTrue(cat4.ordered)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_remove_categories(self):
        cat = Categorical(["a", "b", "c", "a"], ordered=True)
        old = cat.copy()
        new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
                          ordered=True)

        # first inplace == False
        res = cat.remove_categories("c")
        self.assert_categorical_equal(cat, old)
        self.assert_categorical_equal(res, new)

        res = cat.remove_categories(["c"])
        self.assert_categorical_equal(cat, old)
        self.assert_categorical_equal(res, new)

        # inplace == True
        res = cat.remove_categories("c", inplace=True)
        self.assert_categorical_equal(cat, new)
        self.assertIsNone(res)

        # removal is not in categories
        def f():
            cat.remove_categories(["c"])

        self.assertRaises(ValueError, f)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_shift(self):
        # GH 9416
        cat = pd.Categorical(['a', 'b', 'c', 'd', 'a'])

        # shift forward
        sp1 = cat.shift(1)
        xp1 = pd.Categorical([np.nan, 'a', 'b', 'c', 'd'])
        self.assert_categorical_equal(sp1, xp1)
        self.assert_categorical_equal(cat[:-1], sp1[1:])

        # shift back
        sn2 = cat.shift(-2)
        xp2 = pd.Categorical(['c', 'd', 'a', np.nan, np.nan],
                             categories=['a', 'b', 'c', 'd'])
        self.assert_categorical_equal(sn2, xp2)
        self.assert_categorical_equal(cat[2:], sn2[:-2])

        # shift by zero
        self.assert_categorical_equal(cat, cat.shift(0))
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_dtypes(self):

        # GH8143
        index = ['cat', 'obj', 'num']
        cat = pd.Categorical(['a', 'b', 'c'])
        obj = pd.Series(['a', 'b', 'c'])
        num = pd.Series([1, 2, 3])
        df = pd.concat([pd.Series(cat), obj, num], axis=1, keys=index)

        result = df.dtypes == 'object'
        expected = Series([False, True, False], index=index)
        tm.assert_series_equal(result, expected)

        result = df.dtypes == 'int64'
        expected = Series([False, False, True], index=index)
        tm.assert_series_equal(result, expected)

        result = df.dtypes == 'category'
        expected = Series([True, False, False], index=index)
        tm.assert_series_equal(result, expected)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_codes_dtypes(self):

        # GH 8453
        result = Categorical(['foo', 'bar', 'baz'])
        self.assertTrue(result.codes.dtype == 'int8')

        result = Categorical(['foo%05d' % i for i in range(400)])
        self.assertTrue(result.codes.dtype == 'int16')

        result = Categorical(['foo%05d' % i for i in range(40000)])
        self.assertTrue(result.codes.dtype == 'int32')

        # adding cats
        result = Categorical(['foo', 'bar', 'baz'])
        self.assertTrue(result.codes.dtype == 'int8')
        result = result.add_categories(['foo%05d' % i for i in range(400)])
        self.assertTrue(result.codes.dtype == 'int16')

        # removing cats
        result = result.remove_categories(['foo%05d' % i for i in range(300)])
        self.assertTrue(result.codes.dtype == 'int8')
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_reshaping(self):

        p = tm.makePanel()
        p['str'] = 'foo'
        df = p.to_frame()
        df['category'] = df['str'].astype('category')
        result = df['category'].unstack()

        c = Categorical(['foo'] * len(p.major_axis))
        expected = DataFrame({'A': c.copy(),
                              'B': c.copy(),
                              'C': c.copy(),
                              'D': c.copy()},
                             columns=Index(list('ABCD'), name='minor'),
                             index=p.major_axis.set_names('major'))
        tm.assert_frame_equal(result, expected)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_reindex(self):

        index = pd.date_range('20000101', periods=3)

        # reindexing to an invalid Categorical
        s = Series(['a', 'b', 'c'], dtype='category')
        result = s.reindex(index)
        expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
                                      categories=['a', 'b', 'c']))
        expected.index = index
        tm.assert_series_equal(result, expected)

        # partial reindexing
        expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b',
                                                                     'c']))
        expected.index = [1, 2]
        result = s.reindex([1, 2])
        tm.assert_series_equal(result, expected)

        expected = Series(Categorical(
            values=['c', np.nan], categories=['a', 'b', 'c']))
        expected.index = [2, 3]
        result = s.reindex([2, 3])
        tm.assert_series_equal(result, expected)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_nan_handling(self):

        # Nans are represented as -1 in labels
        s = Series(Categorical(["a", "b", np.nan, "a"]))
        self.assert_numpy_array_equal(s.cat.categories, np.array(["a", "b"]))
        self.assert_numpy_array_equal(s.values.codes, np.array([0, 1, -1, 0]))

        # If categories have nan included, the label should point to that
        # instead
        with tm.assert_produces_warning(FutureWarning):
            s2 = Series(Categorical(
                ["a", "b", np.nan, "a"], categories=["a", "b", np.nan]))
        self.assert_numpy_array_equal(s2.cat.categories, np.array(
            ["a", "b", np.nan], dtype=np.object_))
        self.assert_numpy_array_equal(s2.values.codes, np.array([0, 1, 2, 0]))

        # Changing categories should also make the replaced category np.nan
        s3 = Series(Categorical(["a", "b", "c", "a"]))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            s3.cat.categories = ["a", "b", np.nan]
        self.assert_numpy_array_equal(s3.cat.categories, np.array(
            ["a", "b", np.nan], dtype=np.object_))
        self.assert_numpy_array_equal(s3.values.codes, np.array([0, 1, 2, 0]))
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_sequence_like(self):

        # GH 7839
        # make sure can iterate
        df = DataFrame({"id": [1, 2, 3, 4, 5, 6],
                        "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
        df['grade'] = Categorical(df['raw_grade'])

        # basic sequencing testing
        result = list(df.grade.values)
        expected = np.array(df.grade.values).tolist()
        tm.assert_almost_equal(result, expected)

        # iteration
        for t in df.itertuples(index=False):
            str(t)

        for row, s in df.iterrows():
            str(s)

        for c, col in df.iteritems():
            str(s)
test_categorical.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_describe(self):

        # Categoricals should not show up together with numerical columns
        result = self.cat.describe()
        self.assertEqual(len(result.columns), 1)

        # In a frame, describe() for the cat should be the same as for string
        # arrays (count, unique, top, freq)

        cat = Categorical(["a", "b", "b", "b"], categories=['a', 'b', 'c'],
                          ordered=True)
        s = Series(cat)
        result = s.describe()
        expected = Series([4, 2, "b", 3],
                          index=['count', 'unique', 'top', 'freq'])
        tm.assert_series_equal(result, expected)

        cat = pd.Series(pd.Categorical(["a", "b", "c", "c"]))
        df3 = pd.DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]})
        res = df3.describe()
        self.assert_numpy_array_equal(res["cat"].values, res["s"].values)


问题


面经


文章

微信
公众号

扫码关注公众号