python类MultiIndex()的实例源码

test_alpha_rarefaction.py 文件源码 项目:q2-diversity 作者: qiime2 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_one_iteration_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S1', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.],
             ['S2', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S2', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.],
             ['S3', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S3', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs)
test_alpha_rarefaction.py 文件源码 项目:q2-diversity 作者: qiime2 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_two_iterations_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S1', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.],
             ['S2', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S2', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.],
             ['S3', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S3', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs)
test_alpha_rarefaction.py 文件源码 项目:q2-diversity 作者: qiime2 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_unique_metadata_groups(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'milo'],
                                  [9, 10, 11, 12, 'peanut']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])
test_alpha_rarefaction.py 文件源码 项目:q2-diversity 作者: qiime2 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def test_multiple_categories(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', ''),
                                             ('toy', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ', 'stick'],
                                  [5, 6, 7, 8, 'milo', 'yeti'],
                                  [9, 10, 11, 12, 'peanut', 'stick']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet', 'toy'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet', 'toy'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])

        obs = _reindex_with_metadata('toy', ['pet', 'toy'], data)

        exp_ind = pd.Index(['stick', 'yeti'], name='toy')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[2, 2, 2, 2], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])
dataset.py 文件源码 项目:kaggle-seizure-prediction 作者: sics-lm 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def normalize_segment_names(dataframe, inplace=False):
    """
    Makes the segment index of the dataframe have names which correspond to the original .mat segment names.
    :param dataframe: The dataframe with segment names
    :param inplace: If True, the segment index will be changed in place in the given data frame.
    :return: A DataFrame where the segment name part of the index has been canonicalized. If inplace is True, the
             orignal dataframe is returned, otherwise a copy is returned.
    """

    index_values = dataframe.index.get_values()
    fixed_values = [(fileutils.get_segment_name(filename), frame) for filename, frame in index_values]
    if not inplace:
        dataframe = dataframe.copy()
    dataframe.index = pd.MultiIndex.from_tuples(fixed_values, names=dataframe.index.names)
    return dataframe
dataset.py 文件源码 项目:kaggle-seizure-prediction 作者: sics-lm 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def reshape_frames(dataframe, frame_length=12):
    """
    Returns a new dataframe with the given frame length.
    :param dataframe: A pandas DataFrame with one window per row.
    :param frame_length: The desired number of windows for each feature frame. Must divide the number of windows in
                         *dataframe* evenly.
    :return: A new pandas DataFrame with the desired window frame width. The columns of the new data-frame will be
             multi-index so that
        future concatenation of data frames align properly.
    """

    # Assert that the length of the data frame is divisible by
    # frame_length
    n_windows, window_width = dataframe.shape

    if n_windows % frame_length != 0:
        raise ValueError("The dataframe has {} windows which"
                         " is not divisible by the frame"
                         " length {}".format(n_windows, frame_length))
    values = dataframe.values
    n_frames = n_windows / frame_length
    frame_width = window_width * frame_length
    window_columns = dataframe.columns
    column_index = pd.MultiIndex.from_product([range(frame_length),
                                               window_columns],
                                              names=['window', 'feature'])
    reshaped_frame = pd.DataFrame(data=values.reshape(n_frames,
                                                      frame_width),
                                  columns=column_index)
    reshaped_frame.sortlevel(axis=1)
    return reshaped_frame
earnings_estimates.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
        """
        Filters for releases that are on or after each simulation date and
        determines the next quarter by picking out the upcoming release for
        each date in the index.

        Parameters
        ----------
        stacked_last_per_qtr : pd.DataFrame
            A DataFrame with index of calendar dates, sid, and normalized
            quarters with each row being the latest estimate for the row's
            index values, sorted by event date.

        Returns
        -------
        next_releases_per_date_index : pd.MultiIndex
            An index of calendar dates, sid, and normalized quarters, for only
            the rows that have a next event.
        """
        next_releases_per_date = stacked_last_per_qtr.loc[
            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
            stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES)
        ].groupby(
            level=[SIMULATION_DATES, SID_FIELD_NAME],
            as_index=False,
            # Here we take advantage of the fact that `stacked_last_per_qtr` is
            # sorted by event date.
        ).nth(0)
        return next_releases_per_date.index
earnings_estimates.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
        """
        Filters for releases that are on or after each simulation date and
        determines the previous quarter by picking out the most recent
        release relative to each date in the index.

        Parameters
        ----------
        stacked_last_per_qtr : pd.DataFrame
            A DataFrame with index of calendar dates, sid, and normalized
            quarters with each row being the latest estimate for the row's
            index values, sorted by event date.

        Returns
        -------
        previous_releases_per_date_index : pd.MultiIndex
            An index of calendar dates, sid, and normalized quarters, for only
            the rows that have a previous event.
        """
        previous_releases_per_date = stacked_last_per_qtr.loc[
            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
            stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES)
        ].groupby(
            level=[SIMULATION_DATES, SID_FIELD_NAME],
            as_index=False,
            # Here we take advantage of the fact that `stacked_last_per_qtr` is
            # sorted by event date.
        ).nth(-1)
        return previous_releases_per_date.index
__init__.py 文件源码 项目:jupyter-handsontables 作者: techmuch 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def validate(self, obj, value):
        value = super(PandasDataFrame, self).validate(obj, value)
        if self.get_metadata('lexsort'):
            if isinstance(value.columns, pd.MultiIndex):
                value = value.sortlevel(0, axis=1)
        return value
__init__.py 文件源码 项目:jupyter-handsontables 作者: techmuch 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def validate(self, obj, value):
        value = super(PandasDataFrame, self).validate(obj, value)
        if self.get_metadata('lexsort'):
            if isinstance(value.columns, pd.MultiIndex):
                value = value.sortlevel(0, axis=1)
        return value
metrics_test.py 文件源码 项目:meterstick 作者: google 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def testTwoDimensionalDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.Distribution("X", ["Y", "Z"])
    output = metric(df, weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 1 / 14., 1 / 14., 11 / 14.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[1, 2, 0, 1], [1, 0, 0, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(output.equals(correct))
core_test.py 文件源码 项目:meterstick 作者: google 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def testShuffledDataframeRelativeToJackknife(self):
    # Same as test above, but also testing that reordering the data doesn't
    # change results, up to order.
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    se_method = standard_errors.Jackknife()
    output = core.Analyze(df.iloc[np.random.permutation(11)]).relative_to(
        comparisons.AbsoluteDifference("Y", 0)).with_standard_errors(
            se_method).calculate(metric).run()
    output = (output.
              reset_index().
              sort_values(by=["Y", "Z"]).
              set_index(["Y", "Z"]))

    correct = pd.DataFrame(
        np.array([[-0.2, 0.18100283490],
                  [0.2, 0.18100283490]]),
        columns=["X Distribution Absolute Difference",
                 "X Distribution Absolute Difference Jackknife SE"],
        index=pd.MultiIndex(levels=[[1.], [0., 1.]],
                            labels=[[0, 0], [0, 1]],
                            names=["Y", "Z"]))
    correct = (correct.
               reset_index().
               sort_values(by=["Y", "Z"]).
               set_index(["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10))
core.py 文件源码 项目:weightedcalcs 作者: jsvine 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def groupby_deco(func):
    def func_wrapper(self, thing, *args, **kwargs):
        if isinstance(thing, pd.core.groupby.DataFrameGroupBy):
            agg = thing.apply(lambda x: func(self, x, *args, **kwargs))
            is_series = isinstance(agg, pd.core.series.Series)
            has_multiindex = isinstance(agg.index, pd.MultiIndex)
            if is_series and has_multiindex:
                return agg.unstack()
            else:
                return agg
        return func(self, thing, *args, **kwargs)
    return func_wrapper
common.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def _isnull_new(obj):
    if lib.isscalar(obj):
        return lib.checknull(obj)
    # hack (for now) because MI registers as ndarray
    elif isinstance(obj, pd.MultiIndex):
        raise NotImplementedError("isnull is not defined for MultiIndex")
    elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
        return _isnull_ndarraylike(obj)
    elif isinstance(obj, ABCGeneric):
        return obj._constructor(obj._data.isnull(func=isnull))
    elif isinstance(obj, list) or hasattr(obj, '__array__'):
        return _isnull_ndarraylike(np.asarray(obj))
    else:
        return obj is None
test_timeseries.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def test_get_level_values_box(self):
        from pandas import MultiIndex

        dates = date_range('1/1/2000', periods=4)
        levels = [dates, [0, 1]]
        labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]

        index = MultiIndex(levels=levels, labels=labels)

        self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp))
test_base.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def setUp(self):
        self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100),
                            strIndex=tm.makeStringIndex(100),
                            dateIndex=tm.makeDateIndex(100),
                            periodIndex=tm.makePeriodIndex(100),
                            tdIndex=tm.makeTimedeltaIndex(100),
                            intIndex=tm.makeIntIndex(100),
                            rangeIndex=tm.makeIntIndex(100),
                            floatIndex=tm.makeFloatIndex(100),
                            boolIndex=Index([True, False]),
                            catIndex=tm.makeCategoricalIndex(100),
                            empty=Index([]),
                            tuples=MultiIndex.from_tuples(lzip(
                                ['foo', 'bar', 'baz'], [1, 2, 3])))
        self.setup_indices()
test_base.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_construction_list_mixed_tuples(self):
        # 10697
        # if we are constructing from a mixed list of tuples, make sure that we
        # are independent of the sorting order
        idx1 = Index([('A', 1), 'B'])
        self.assertIsInstance(idx1, Index) and self.assertNotInstance(
            idx1, MultiIndex)
        idx2 = Index(['B', ('A', 1)])
        self.assertIsInstance(idx2, Index) and self.assertNotInstance(
            idx2, MultiIndex)
test_base.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def test_str_attribute(self):
        # GH9068
        methods = ['strip', 'rstrip', 'lstrip']
        idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
        for method in methods:
            expected = Index([getattr(str, method)(x) for x in idx.values])
            tm.assert_index_equal(
                getattr(Index.str, method)(idx.str), expected)

        # create a few instances that are not able to use .str accessor
        indices = [Index(range(5)), tm.makeDateIndex(10),
                   MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]),
                   PeriodIndex(start='2000', end='2010', freq='A')]
        for idx in indices:
            with self.assertRaisesRegexp(AttributeError,
                                         'only use .str accessor'):
                idx.str.repeat(2)

        idx = Index(['a b c', 'd e', 'f'])
        expected = Index([['a', 'b', 'c'], ['d', 'e'], ['f']])
        tm.assert_index_equal(idx.str.split(), expected)
        tm.assert_index_equal(idx.str.split(expand=False), expected)

        expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan),
                                           ('f', np.nan, np.nan)])
        tm.assert_index_equal(idx.str.split(expand=True), expected)

        # test boolean case, should return np.array instead of boolean Index
        idx = Index(['a1', 'a2', 'b1', 'b2'])
        expected = np.array([True, True, False, False])
        tm.assert_numpy_array_equal(idx.str.startswith('a'), expected)
        self.assertIsInstance(idx.str.startswith('a'), np.ndarray)
        s = Series(range(4), index=idx)
        expected = Series(range(2), index=['a1', 'a2'])
        tm.assert_series_equal(s[s.index.str.startswith('a')], expected)
test_base.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self):
        # GH7774
        idx = pd.Index(list('abc'))

        def get_reindex_type(target):
            return idx.reindex(target)[0].dtype.type

        self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int64)
        self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64)
        self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64)

        reindexed = idx.reindex(pd.MultiIndex(
            [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0]
        self.assertEqual(reindexed.levels[0].dtype.type, np.int64)
        self.assertEqual(reindexed.levels[1].dtype.type, np.float64)
test_reshape.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def test_pivot_index_none(self):
        # gh-3962
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data).set_index('index')
        result = frame.pivot(columns='columns', values='values')
        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns='columns')

        expected.columns = pd.MultiIndex.from_tuples([('values', 'One'),
                                                      ('values', 'Two')],
                                                     names=[None, 'columns'])
        expected.index.name = 'index'
        assert_frame_equal(result, expected, check_names=False)
        self.assertEqual(result.index.name, 'index',)
        self.assertEqual(result.columns.names, (None, 'columns'))
        expected.columns = expected.columns.droplevel(0)

        data = {
            'index': range(7),
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        result = frame.pivot(columns='columns', values='values')

        expected.columns.name = 'columns'
        assert_frame_equal(result, expected)


问题


面经


文章

微信
公众号

扫码关注公众号