python类MultiIndex()的实例源码-面圈网

notebooks.py 文件源码项目：fireant 作者: kayak 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def _set_display_options(self, dataframe, display_schema):
        """
        Replaces the dimension options with those that the user has specified manually e.g. change 'm' to 'mobile'
        """
        dataframe = dataframe.copy()

        for key, dimension in display_schema['dimensions'].items():
            if 'display_options' in dimension:
                display_values = [dimension['display_options'].get(value, value)
                                  for value in dataframe.index.get_level_values(key).unique()]

                if not display_values:
                    continue

                if isinstance(dataframe.index, pd.MultiIndex):
                    dataframe.index.set_levels(display_values, key, inplace=True)

                else:
                    dataframe.index = pd.Index(display_values)

        return dataframe

test_alpha_rarefaction.py 文件源码项目：q2-diversity 作者: qiime2 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def test_three_iterations_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2, 3]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6],
                                  [1, 2, 3, 4, 5, 6]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S1', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.],
             ['S2', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S2', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.],
             ['S3', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S3', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs)

test_alpha_rarefaction.py 文件源码项目：q2-diversity 作者: qiime2 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def test_two_iterations_with_metadata_were_values_are_identical(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[3, 6, 9, 9]], columns=columns,
                            index=['milo'])

        counts = pd.DataFrame(data=[[3, 3, 3, 3]], columns=columns,
                              index=['milo'])

        obs = _compute_summary(data, 'pet', counts=counts)

        d = [
            ['milo', 1,   3., 3.06, 3.27, 3.75, 4.5,  5.25, 5.73, 5.94, 6., 3],
            ['milo', 200, 9.,   9.,   9.,   9.,  9.,    9.,   9.,   9., 9., 3],
        ]
        exp = pd.DataFrame(data=d, columns=['pet', 'depth', 'min', '2%', '9%',
                                            '25%', '50%', '75%', '91%', '98%',
                                            'max', 'count'])
        pdt.assert_frame_equal(exp, obs)

test_alpha_rarefaction.py 文件源码项目：q2-diversity 作者: qiime2 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def test_some_duplicates_in_category(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'milo'],
                                  [9, 10, 11, 12, 'russ']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [2, 2, 2, 2]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])

test_alpha_rarefaction.py 文件源码项目：q2-diversity 作者: qiime2 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def test_all_identical(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'russ'],
                                  [9, 10, 11, 12, 'russ']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[3, 3, 3, 3]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])

data_handlers.py 文件源码项目：feagen 作者: ianlini 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def write_data(self, result_dict):
        for key, result in six.iteritems(result_dict):
            is_null = False
            if isinstance(result, pd.DataFrame):
                if result.isnull().any().any():
                    is_null = True
            elif isinstance(result, pd.Series):
                if result.isnull().any():
                    is_null = True
            else:
                raise ValueError("PandasHDFDataHandler doesn't support type "
                                 "{} (in key {})".format(type(result), key))
            if is_null:
                raise ValueError("data {} have nan".format(key))
            with SimpleTimer("Writing generated data {} to hdf5 file"
                             .format(key),
                             end_in_new_line=False):
                if (isinstance(result, pd.DataFrame)
                        and isinstance(result.index, pd.MultiIndex)
                        and isinstance(result.columns, pd.MultiIndex)):
                    self.hdf_store.put(key, result)
                else:
                    self.hdf_store.put(key, result, format='table')
        self.hdf_store.flush(fsync=True)

dataset.py 文件源码项目：kaggle-seizure-prediction 作者: sics-lm 项目源码文件源码阅读 63 收藏 0 点赞 0 评论 0

def merge_interictal_preictal(interictal, preictal):
    """
    Merges the *interictal* and *preictal* data frames to a single data frame. Also sorts the multilevel index.
    :param interictal: A data frame containing the interictal samples.
    :param preictal: A data frame containing the preictal samples.
    :return: A data frame containing both interictal and preictal data. The multilevel index of the data frame
             is sorted.
    """

    logging.info("Merging interictal and preictal datasets")
    try:
        preictal.sortlevel('segment', inplace=True)
        if isinstance(preictal.columns, pd.MultiIndex):
            preictal.sortlevel(axis=1, inplace=True)

        interictal.sortlevel('segment', inplace=True)
        if isinstance(interictal.columns, pd.MultiIndex):
            interictal.sortlevel(axis=1, inplace=True)
    except TypeError:
        logging.warning("TypeError when trying to merge interictal and preictal sets.")

    dataset = pd.concat((interictal, preictal))
    dataset.sortlevel('segment', inplace=True)
    return dataset

dataset.py 文件源码项目：kaggle-seizure-prediction 作者: sics-lm 项目源码文件源码阅读 51 收藏 0 点赞 0 评论 0

def test_k_fold_segment_split():
    """ Test function for the k-fold segment split """
    interictal_classes = np.zeros(120)
    preictal_classes = np.ones(120)
    classes = np.concatenate((interictal_classes, preictal_classes,))
    segments = np.arange(12)
    i = np.arange(240)

    index = pd.MultiIndex.from_product([segments, np.arange(20)], names=('segment', 'start_sample'))

    dataframe = pd.DataFrame({'Preictal': classes, 'i': i}, index=index)

    # With a 6-fold cross validator, we expect each held-out fold to contain exactly 2 segments, one from each class
    cv1 = SegmentCrossValidator(dataframe, n_folds=6, shuffle=True, random_state=42)
    cv2 = SegmentCrossValidator(dataframe, n_folds=6, shuffle=True, random_state=42)

    for (training_fold1, test_fold1), (training_fold2, test_fold2) in zip(cv1, cv2):
        assert np.all(training_fold1 == training_fold1) and np.all(test_fold1 == test_fold2)

dataset.py 文件源码项目：kaggle-seizure-prediction 作者: sics-lm 项目源码文件源码阅读 49 收藏 0 点赞 0 评论 0

def load_preictal_dataframes(feature_folder, sliding_frames=False, **kwargs):
    """
    Convenience function for loading preictal dataframes. Sets the 'Preictal' column to 1.
    :param feature_folder: The folder to load the feature data from.
    :param sliding_frames: If True, the data frame will be extended using sliding frames over the feature windows.
    :param kwargs: keyword arguments to use for loading the features.
    :return: A DataFrame of preictal data with a 'Preictal' column set to 1.
    """
    preictal = load_feature_files(feature_folder,
                                  class_name="preictal",
                                  sliding_frames=sliding_frames,
                                  **kwargs)
    preictal['Preictal'] = 1
    preictal.sortlevel('segment', inplace=True)
    if isinstance(preictal.columns, pd.MultiIndex):
        preictal.sortlevel(axis=1, inplace=True)
    return preictal

dataset.py 文件源码项目：kaggle-seizure-prediction 作者: sics-lm 项目源码文件源码阅读 49 收藏 0 点赞 0 评论 0

def load_interictal_dataframes(feature_folder, sliding_frames=False, **kwargs):
    """
    Convenience function for loading interictal dataframes. Sets the 'Preictal' column to 0.
    :param feature_folder: The folder to load the feature data from.
    :param sliding_frames: If True, the data frame will be extended using sliding frames over the feature windows.
    :param kwargs: keyword arguments to use for loading the features.
    :return: A DataFrame of interictal data with a 'Preictal' column set to 0.
    """

    interictal = load_feature_files(feature_folder,
                                    class_name="preictal",
                                    sliding_frames=sliding_frames,
                                    **kwargs)
    interictal['Preictal'] = 0
    interictal.sortlevel('segment', inplace=True)
    if isinstance(interictal.columns, pd.MultiIndex):
        interictal.sortlevel(axis=1, inplace=True)
    return interictal

dataset.py 文件源码项目：kaggle-seizure-prediction 作者: sics-lm 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def create_sliding_frames(dataframe, frame_length=12):
    """
    Wrapper for the extend_data_with_sliding_frames function which works with numpy arrays.
    This version does the data-frame conversion for us.

    :param dataframe: The dataframe to extend.
    :param frame_length: The frame length to use in the resulting extended data frame.
    :return: A new data frame where the original dataframe has been extended with sliding frames.
    """
    extended_array = extend_data_with_sliding_frames(dataframe.values)
    # We should preserve the columns of the dataframe, otherwise
    # concatenating different dataframes along the row-axis will give
    # wrong results
    window_columns = dataframe.columns
    column_index = pd.MultiIndex.from_product([range(frame_length),
                                               window_columns],
                                              names=['window', 'feature'])
    return pd.DataFrame(data=extended_array,
                        columns=column_index)

metrics_test.py 文件源码项目：meterstick 作者: google 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def testTwoDimensionalCumulativeDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.CumulativeDistribution("X", ["Y", "Z"])
    output = metric(df, weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 12 / 14., 13 / 14., 1.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[0, 1, 1, 2], [0, 0, 1, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    all(abs(output.values - correct.values) < 1e-10))

metrics_test.py 文件源码项目：meterstick 作者: google 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def testShuffledTwoDimensionalCumulativeDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.CumulativeDistribution("X", ["Y", "Z"])
    output = metric(df.iloc[np.random.permutation(7)], weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 12 / 14., 13 / 14., 1.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[0, 1, 1, 2], [0, 0, 1, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    all(abs(output.values - correct.values) < 1e-10))

core_test.py 文件源码项目：meterstick 作者: google 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def testRelativeToSplitJackknife(self):
    data = pd.DataFrame(
        {"X": [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8],
         "Y": [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3],
         "Z": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]})

    metric = metrics.Sum("X")
    comparison = comparisons.AbsoluteDifference("Z", 0)
    se_method = standard_errors.Jackknife()
    output = core.Analyze(data).split_by("Y").relative_to(
        comparison).with_standard_errors(se_method).calculate(metric).run()

    rowindex = pd.MultiIndex(
        levels=[[1, 2, 3], [1]],
        labels=[[0, 1, 2], [0, 0, 0]],
        names=["Y", "Z"])
    correct = pd.DataFrame(
        np.array([[-3.0, np.sqrt(5 * np.var([0, -1, -2, -3, -4, -5]))],
                  [-3.0, np.sqrt(5 * np.var([3, 2, 1, -8, -7, -6]))],
                  [-3.0, np.sqrt(5 * np.var([6, 5, 4, -11, -10, -9]))]]),
        columns=("sum(X) Absolute Difference",
                 "sum(X) Absolute Difference Jackknife SE"),
        index=rowindex)

    self.assertTrue(output.equals(correct))

core_test.py 文件源码项目：meterstick 作者: google 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def testDataframeRelativeTo(self):
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    output = core.Analyze(df).relative_to(comparisons.AbsoluteDifference(
        "Y", 0)).calculate(metric).run()

    correct = pd.DataFrame(
        np.array([-0.2, 0.2]),
        columns=["X Distribution Absolute Difference"],
        index=pd.MultiIndex(levels=[[1.], [0., 1.]],
                            labels=[[0, 0], [0, 1]],
                            names=["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10))

core_test.py 文件源码项目：meterstick 作者: google 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def testSplitDataframe(self):
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    output = core.Analyze(df).split_by(["Y"]).calculate(metric).run()

    correct = pd.DataFrame(
        np.array([0.2, 0.8, 0.0, 1.0]),
        columns=["X Distribution"],
        index=pd.MultiIndex(levels=[[0.0, 1.0], [0.0, 1.0]],
                            labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                            names=["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10))

common.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _isnull_old(obj):
    """Detect missing values. Treat None, NaN, INF, -INF as null.

    Parameters
    ----------
    arr: ndarray or object value

    Returns
    -------
    boolean ndarray or boolean
    """
    if lib.isscalar(obj):
        return lib.checknull_old(obj)
    # hack (for now) because MI registers as ndarray
    elif isinstance(obj, pd.MultiIndex):
        raise NotImplementedError("isnull is not defined for MultiIndex")
    elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
        return _isnull_ndarraylike_old(obj)
    elif isinstance(obj, ABCGeneric):
        return obj._constructor(obj._data.isnull(func=_isnull_old))
    elif isinstance(obj, list) or hasattr(obj, '__array__'):
        return _isnull_ndarraylike_old(np.asarray(obj))
    else:
        return obj is None

test_base.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 57 收藏 0 点赞 0 评论 0

def test_equals_op_multiindex(self):
        # GH9785
        # test comparisons of multiindex
        from pandas.compat import StringIO
        df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
        tm.assert_numpy_array_equal(df.index == df.index,
                                    np.array([True, True]))

        mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)])
        tm.assert_numpy_array_equal(df.index == mi1, np.array([True, True]))
        mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)])
        tm.assert_numpy_array_equal(df.index == mi2, np.array([True, False]))
        mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            df.index == mi3

        index_a = Index(['foo', 'bar', 'baz'])
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            df.index == index_a
        tm.assert_numpy_array_equal(index_a == mi3,
                                    np.array([False, False, False]))

test_reshape.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def test_stack_ints(self):
        df = DataFrame(
            np.random.randn(30, 27),
            columns=MultiIndex.from_tuples(
                list(itertools.product(range(3), repeat=3))
            )
        )
        assert_frame_equal(
            df.stack(level=[1, 2]),
            df.stack(level=1).stack(level=1)
        )
        assert_frame_equal(
            df.stack(level=[-2, -1]),
            df.stack(level=1).stack(level=1)
        )

        df_named = df.copy()
        df_named.columns.set_names(range(3), inplace=True)
        assert_frame_equal(
            df_named.stack(level=[1, 2]),
            df_named.stack(level=1).stack(level=1)
        )

test_reshape.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def test_unstack_level_binding(self):
        # GH9856
        mi = pd.MultiIndex(
            levels=[[u('foo'), u('bar')], [u('one'), u('two')],
                    [u('a'), u('b')]],
            labels=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]],
            names=[u('first'), u('second'), u('third')])
        s = pd.Series(0, index=mi)
        result = s.unstack([1, 2]).stack(0)

        expected_mi = pd.MultiIndex(
            levels=[['foo', 'bar'], ['one', 'two']],
            labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
            names=['first', 'second'])

        expected = pd.DataFrame(np.array([[np.nan, 0],
                                          [0, np.nan],
                                          [np.nan, 0],
                                          [0, np.nan]],
                                         dtype=np.float64),
                                index=expected_mi,
                                columns=pd.Index(['a', 'b'], name='third'))

        assert_frame_equal(result, expected)

test_reshape.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_unstack_to_series(self):
        # check reversibility
        data = self.frame.unstack()

        self.assertTrue(isinstance(data, Series))
        undo = data.unstack().T
        assert_frame_equal(undo, self.frame)

        # check NA handling
        data = DataFrame({'x': [1, 2, np.NaN], 'y': [3.0, 4, np.NaN]})
        data.index = Index(['a', 'b', 'c'])
        result = data.unstack()

        midx = MultiIndex(levels=[['x', 'y'], ['a', 'b', 'c']],
                          labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
        expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx)

        assert_series_equal(result, expected)

        # check composability of unstack
        old_data = data.copy()
        for _ in range(4):
            data = data.unstack()
        assert_frame_equal(old_data, data)

plot.py 文件源码项目：powerplantmatching 作者: FRESNA 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def bar_fueltype_and_country_totals(dfs, keys, figsize=(12,8)):
    df = lookup(dfs, keys)
    countries = df.columns.levels[0] if isinstance(df.columns, pd.MultiIndex) else df.columns
    n = len(countries)
    subplots = gather_nrows_ncols(n)
    fig, ax = plt.subplots(*subplots, figsize=figsize)

    if sum(subplots)>2:
        ax_iter = ax.flat
    else:
        ax_iter = np.array(ax).flat
    for country in countries:
        ax = next(ax_iter)
        df[country].plot.bar(ax=ax, sharex=True, rot=55, legend=None)
        ax.ticklabel_format(axis='y', style='sci', scilimits=(-2,2))
        ax.set_title(country)
        fig.tight_layout(pad=0.5)
    return fig, ax

table.py 文件源码项目：PyBloqs 作者: manahl 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def _get_header_iterable(self):
        """Reformats all but the last header rows."""
        df_clean = self.df.loc[:, self.df.columns.get_level_values(0) != ORG_ROW_NAMES]
        if isinstance(df_clean.columns, pd.MultiIndex):
            transpose_tuples = zip(*df_clean.columns.tolist())
            header_values = []
            for i, t in enumerate(transpose_tuples):
                if i < len(transpose_tuples) - 1:
                    # Not the last column, aggregate repeated items, e.g. [['aa', 'aa', 'aa'], ['bb', 'bb', 'bb']]
                    header_values.append([list(g) for _, g in itertools.groupby(t)])
                else:
                    # For the last column keep all elements in single list, e.g. ['a', 'b', 'c', 'a', 'b', 'c']
                    header_values.append(list(t))
            return header_values
        else:
            return [df_clean.columns.tolist()]

postprocessors.py 文件源码项目：fireant 作者: kayak 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def _perform_operation(self, dataframe, key, schema, value_func, operation):
        # Check for references
        references = (dataframe.columns.get_level_values(0).tolist()
                      if isinstance(dataframe.columns, pd.MultiIndex)
                      else [None])

        for reference in references:
            metric_df = value_func(dataframe, schema, reference=reference)

            operation_key = ('{}_{}'.format(metric_df.name, key)
                             if reference is None
                             else (reference, '{}_{}'.format(metric_df.name[1], key)))

            if isinstance(dataframe.index, pd.MultiIndex):
                unstack_levels = list(range(1, len(dataframe.index.levels)))
                dataframe[operation_key] = metric_df.groupby(level=unstack_levels).apply(operation)

            else:
                dataframe[operation_key] = operation(metric_df)

datatables.py 文件源码项目：fireant 作者: kayak 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _render_data(self, dataframe, display_schema):
        n = len(dataframe.index.levels) if isinstance(dataframe.index, pd.MultiIndex) else 1
        dimensions = list(display_schema['dimensions'].items())
        row_dimensions, column_dimensions = dimensions[:n], dimensions[n:]

        data = []
        for idx, df_row in dataframe.iterrows():
            row = {}

            if not isinstance(idx, tuple):
                idx = (idx,)

            for key, value in self._render_dimension_data(idx, row_dimensions):
                row[key] = value

            for key, value in self._render_metric_data(df_row, column_dimensions,
                                                       display_schema['metrics'], display_schema.get('references')):
                row[key] = value

            data.append(row)

        return data

multi_index.py 文件源码项目：xarray_filters 作者: ContinuumIO 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def create_multi_index(arr):
    '''From DataArray arr make a pandas.MultiIndex for the arr.coords

    Parameters
    ----------

    arr: xarray.DataArray

    Returns
    -------

    index: pandas.MultiIndex instance with index names
           taken from arr.dims and levels taken from arr.coords

    Examples
    --------

    '''
    np_arrs = tuple(getattr(arr, dim).values for dim in arr.dims)
    index = pd.MultiIndex.from_product(np_arrs, names=arr.dims)
    return index

test_perfume.py 文件源码项目：perfume 作者: leifwalsh 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def setUp(self):
        samples = []
        t = 1.0
        for i in range(20):
            sample = []
            sample.append(t)
            t += 1.1
            sample.append(t)
            t += 0.2
            sample.append(t)
            t += 1.5
            sample.append(t)
            t += 0.1
            samples.append(sample)
        self.samples = pd.DataFrame(
            data=samples,
            columns=pd.MultiIndex(
                levels=[['fn1', 'fn2'], ['begin', 'end']],
                labels=[[0, 0, 1, 1], [0, 1, 0, 1]]
            )
        )

analysis.py 文件源码项目：crop-seq 作者: epigen 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def read_seurat_hdf5(hdf5_file):
    import h5py
    with h5py.File(hdf5_file, 'r') as handle:
        cols = handle.get("seurat_matrix/columns").value
        rows = handle.get("seurat_matrix/rows").value
        df = handle.get("seurat_matrix/matrix").value
    seurat_matrix = pd.DataFrame(df, index=cols, columns=rows).T

    # add info as multiindex columns
    condition = map(lambda x: x[0], seurat_matrix.columns.str.split("|"))
    replicate = map(lambda x: x[1], seurat_matrix.columns.str.split("|"))
    cell = map(lambda x: x[2], seurat_matrix.columns.str.split("|"))
    grna = map(lambda x: x[3], seurat_matrix.columns.str.split("|"))
    gene = map(lambda x: x[1] if len(x) > 1 else x[0][:4], pd.Series(grna).str.split("_"))
    seurat_matrix.columns = pd.MultiIndex.from_arrays([condition, replicate, cell, grna, gene], names=['condition', 'replicate', 'cell', 'grna', 'gene'])

    return seurat_matrix

test_alpha_rarefaction.py 文件源码项目：q2-diversity 作者: qiime2 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def test_observed_otus(self):
        t = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
        obs = _compute_rarefaction_data(feature_table=t,
                                        min_depth=1,
                                        max_depth=200,
                                        steps=2,
                                        iterations=1,
                                        phylogeny=None,
                                        metrics=['observed_otus'])

        exp_ind = pd.MultiIndex.from_product(
            [[1, 200], [1]],
            names=['depth', 'iter'])
        exp = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['observed_otus'], exp)

test_alpha_rarefaction.py 文件源码项目：q2-diversity 作者: qiime2 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def test_multiple_metrics(self):
        t = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
        obs = _compute_rarefaction_data(feature_table=t,
                                        min_depth=1,
                                        max_depth=200,
                                        steps=2,
                                        iterations=1,
                                        phylogeny=None,
                                        metrics=['observed_otus', 'shannon'])

        exp_ind = pd.MultiIndex.from_product(
            [[1, 200], [1]],
            names=['depth', 'iter'])
        exp = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['observed_otus'], exp)

        exp = pd.DataFrame(data=[[0., 0.811278124459], [0., 1.], [0., 1.]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['shannon'], exp)