def _set_display_options(self, dataframe, display_schema):
"""
Replaces the dimension options with those that the user has specified manually e.g. change 'm' to 'mobile'
"""
dataframe = dataframe.copy()
for key, dimension in display_schema['dimensions'].items():
if 'display_options' in dimension:
display_values = [dimension['display_options'].get(value, value)
for value in dataframe.index.get_level_values(key).unique()]
if not display_values:
continue
if isinstance(dataframe.index, pd.MultiIndex):
dataframe.index.set_levels(display_values, key, inplace=True)
else:
dataframe.index = pd.Index(display_values)
return dataframe
python类MultiIndex()的实例源码
def test_three_iterations_no_metadata(self):
columns = pd.MultiIndex.from_product([[1, 200], [1, 2, 3]],
names=['depth', 'iter'])
data = pd.DataFrame(data=[[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6],
[1, 2, 3, 4, 5, 6]],
columns=columns, index=['S1', 'S2', 'S3'])
# No counts provided because no metadata
obs = _compute_summary(data, 'sample-id')
d = [['S1', 1, 1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
['S1', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.],
['S2', 1, 1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
['S2', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.],
['S3', 1, 1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
['S3', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.]]
exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
'min', '2%', '9%', '25%', '50%',
'75%', '91%', '98%', 'max'])
pdt.assert_frame_equal(exp, obs)
def test_two_iterations_with_metadata_were_values_are_identical(self):
columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
names=['depth', 'iter'])
data = pd.DataFrame(data=[[3, 6, 9, 9]], columns=columns,
index=['milo'])
counts = pd.DataFrame(data=[[3, 3, 3, 3]], columns=columns,
index=['milo'])
obs = _compute_summary(data, 'pet', counts=counts)
d = [
['milo', 1, 3., 3.06, 3.27, 3.75, 4.5, 5.25, 5.73, 5.94, 6., 3],
['milo', 200, 9., 9., 9., 9., 9., 9., 9., 9., 9., 3],
]
exp = pd.DataFrame(data=d, columns=['pet', 'depth', 'min', '2%', '9%',
'25%', '50%', '75%', '91%', '98%',
'max', 'count'])
pdt.assert_frame_equal(exp, obs)
def test_some_duplicates_in_category(self):
columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
(200, 2), ('pet', '')],
names=['depth', 'iter'])
data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'milo'],
[9, 10, 11, 12, 'russ']],
columns=columns, index=['S1', 'S2', 'S3'])
obs = _reindex_with_metadata('pet', ['pet'], data)
exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['depth', 'iter'])
exp_ind = pd.Index(['milo', 'russ'], name='pet')
exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
columns=exp_col, index=exp_ind)
pdt.assert_frame_equal(exp, obs[0])
exp = pd.DataFrame(data=[[1, 1, 1, 1], [2, 2, 2, 2]],
columns=exp_col, index=exp_ind)
pdt.assert_frame_equal(exp, obs[1])
def test_all_identical(self):
columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
(200, 2), ('pet', '')],
names=['depth', 'iter'])
data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'russ'],
[9, 10, 11, 12, 'russ']],
columns=columns, index=['S1', 'S2', 'S3'])
obs = _reindex_with_metadata('pet', ['pet'], data)
exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['depth', 'iter'])
exp_ind = pd.Index(['russ'], name='pet')
exp = pd.DataFrame(data=[[5, 6, 7, 8]],
columns=exp_col, index=exp_ind)
pdt.assert_frame_equal(exp, obs[0])
exp = pd.DataFrame(data=[[3, 3, 3, 3]],
columns=exp_col, index=exp_ind)
pdt.assert_frame_equal(exp, obs[1])
def write_data(self, result_dict):
for key, result in six.iteritems(result_dict):
is_null = False
if isinstance(result, pd.DataFrame):
if result.isnull().any().any():
is_null = True
elif isinstance(result, pd.Series):
if result.isnull().any():
is_null = True
else:
raise ValueError("PandasHDFDataHandler doesn't support type "
"{} (in key {})".format(type(result), key))
if is_null:
raise ValueError("data {} have nan".format(key))
with SimpleTimer("Writing generated data {} to hdf5 file"
.format(key),
end_in_new_line=False):
if (isinstance(result, pd.DataFrame)
and isinstance(result.index, pd.MultiIndex)
and isinstance(result.columns, pd.MultiIndex)):
self.hdf_store.put(key, result)
else:
self.hdf_store.put(key, result, format='table')
self.hdf_store.flush(fsync=True)
def merge_interictal_preictal(interictal, preictal):
"""
Merges the *interictal* and *preictal* data frames to a single data frame. Also sorts the multilevel index.
:param interictal: A data frame containing the interictal samples.
:param preictal: A data frame containing the preictal samples.
:return: A data frame containing both interictal and preictal data. The multilevel index of the data frame
is sorted.
"""
logging.info("Merging interictal and preictal datasets")
try:
preictal.sortlevel('segment', inplace=True)
if isinstance(preictal.columns, pd.MultiIndex):
preictal.sortlevel(axis=1, inplace=True)
interictal.sortlevel('segment', inplace=True)
if isinstance(interictal.columns, pd.MultiIndex):
interictal.sortlevel(axis=1, inplace=True)
except TypeError:
logging.warning("TypeError when trying to merge interictal and preictal sets.")
dataset = pd.concat((interictal, preictal))
dataset.sortlevel('segment', inplace=True)
return dataset
def test_k_fold_segment_split():
""" Test function for the k-fold segment split """
interictal_classes = np.zeros(120)
preictal_classes = np.ones(120)
classes = np.concatenate((interictal_classes, preictal_classes,))
segments = np.arange(12)
i = np.arange(240)
index = pd.MultiIndex.from_product([segments, np.arange(20)], names=('segment', 'start_sample'))
dataframe = pd.DataFrame({'Preictal': classes, 'i': i}, index=index)
# With a 6-fold cross validator, we expect each held-out fold to contain exactly 2 segments, one from each class
cv1 = SegmentCrossValidator(dataframe, n_folds=6, shuffle=True, random_state=42)
cv2 = SegmentCrossValidator(dataframe, n_folds=6, shuffle=True, random_state=42)
for (training_fold1, test_fold1), (training_fold2, test_fold2) in zip(cv1, cv2):
assert np.all(training_fold1 == training_fold1) and np.all(test_fold1 == test_fold2)
def load_preictal_dataframes(feature_folder, sliding_frames=False, **kwargs):
"""
Convenience function for loading preictal dataframes. Sets the 'Preictal' column to 1.
:param feature_folder: The folder to load the feature data from.
:param sliding_frames: If True, the data frame will be extended using sliding frames over the feature windows.
:param kwargs: keyword arguments to use for loading the features.
:return: A DataFrame of preictal data with a 'Preictal' column set to 1.
"""
preictal = load_feature_files(feature_folder,
class_name="preictal",
sliding_frames=sliding_frames,
**kwargs)
preictal['Preictal'] = 1
preictal.sortlevel('segment', inplace=True)
if isinstance(preictal.columns, pd.MultiIndex):
preictal.sortlevel(axis=1, inplace=True)
return preictal
def load_interictal_dataframes(feature_folder, sliding_frames=False, **kwargs):
"""
Convenience function for loading interictal dataframes. Sets the 'Preictal' column to 0.
:param feature_folder: The folder to load the feature data from.
:param sliding_frames: If True, the data frame will be extended using sliding frames over the feature windows.
:param kwargs: keyword arguments to use for loading the features.
:return: A DataFrame of interictal data with a 'Preictal' column set to 0.
"""
interictal = load_feature_files(feature_folder,
class_name="preictal",
sliding_frames=sliding_frames,
**kwargs)
interictal['Preictal'] = 0
interictal.sortlevel('segment', inplace=True)
if isinstance(interictal.columns, pd.MultiIndex):
interictal.sortlevel(axis=1, inplace=True)
return interictal
def create_sliding_frames(dataframe, frame_length=12):
"""
Wrapper for the extend_data_with_sliding_frames function which works with numpy arrays.
This version does the data-frame conversion for us.
:param dataframe: The dataframe to extend.
:param frame_length: The frame length to use in the resulting extended data frame.
:return: A new data frame where the original dataframe has been extended with sliding frames.
"""
extended_array = extend_data_with_sliding_frames(dataframe.values)
# We should preserve the columns of the dataframe, otherwise
# concatenating different dataframes along the row-axis will give
# wrong results
window_columns = dataframe.columns
column_index = pd.MultiIndex.from_product([range(frame_length),
window_columns],
names=['window', 'feature'])
return pd.DataFrame(data=extended_array,
columns=column_index)
def testTwoDimensionalCumulativeDistribution(self):
df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
"Y": [1, 2, 0, 1, 1, 1, 1],
"Z": [1, 0, 0, 0, 0, 0, 0]})
weights = np.array([1, 1, 1, 1, 1, 1, 1])
metric = metrics.CumulativeDistribution("X", ["Y", "Z"])
output = metric(df, weights)
correct = pd.DataFrame(
np.array([1 / 14., 12 / 14., 13 / 14., 1.]),
columns=[""],
index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
labels=[[0, 1, 1, 2], [0, 0, 1, 0]],
names=["Y", "Z"]))
self.assertTrue(all(output.index == correct.index) and
all(output.columns == correct.columns) and
all(abs(output.values - correct.values) < 1e-10))
def testShuffledTwoDimensionalCumulativeDistribution(self):
df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
"Y": [1, 2, 0, 1, 1, 1, 1],
"Z": [1, 0, 0, 0, 0, 0, 0]})
weights = np.array([1, 1, 1, 1, 1, 1, 1])
metric = metrics.CumulativeDistribution("X", ["Y", "Z"])
output = metric(df.iloc[np.random.permutation(7)], weights)
correct = pd.DataFrame(
np.array([1 / 14., 12 / 14., 13 / 14., 1.]),
columns=[""],
index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
labels=[[0, 1, 1, 2], [0, 0, 1, 0]],
names=["Y", "Z"]))
self.assertTrue(all(output.index == correct.index) and
all(output.columns == correct.columns) and
all(abs(output.values - correct.values) < 1e-10))
def testRelativeToSplitJackknife(self):
data = pd.DataFrame(
{"X": [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8],
"Y": [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3],
"Z": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]})
metric = metrics.Sum("X")
comparison = comparisons.AbsoluteDifference("Z", 0)
se_method = standard_errors.Jackknife()
output = core.Analyze(data).split_by("Y").relative_to(
comparison).with_standard_errors(se_method).calculate(metric).run()
rowindex = pd.MultiIndex(
levels=[[1, 2, 3], [1]],
labels=[[0, 1, 2], [0, 0, 0]],
names=["Y", "Z"])
correct = pd.DataFrame(
np.array([[-3.0, np.sqrt(5 * np.var([0, -1, -2, -3, -4, -5]))],
[-3.0, np.sqrt(5 * np.var([3, 2, 1, -8, -7, -6]))],
[-3.0, np.sqrt(5 * np.var([6, 5, 4, -11, -10, -9]))]]),
columns=("sum(X) Absolute Difference",
"sum(X) Absolute Difference Jackknife SE"),
index=rowindex)
self.assertTrue(output.equals(correct))
def testDataframeRelativeTo(self):
df = pd.DataFrame({"X": range(11),
"Y": np.concatenate((np.zeros(6), np.ones(5))),
"Z": np.concatenate((np.zeros(3), np.ones(8)))})
metric = metrics.Distribution("X", ["Z"])
output = core.Analyze(df).relative_to(comparisons.AbsoluteDifference(
"Y", 0)).calculate(metric).run()
correct = pd.DataFrame(
np.array([-0.2, 0.2]),
columns=["X Distribution Absolute Difference"],
index=pd.MultiIndex(levels=[[1.], [0., 1.]],
labels=[[0, 0], [0, 1]],
names=["Y", "Z"]))
self.assertTrue(all(output.index == correct.index) and
all(output.columns == correct.columns) and
np.all(abs(output.values - correct.values) < 1e-10))
def testSplitDataframe(self):
df = pd.DataFrame({"X": range(11),
"Y": np.concatenate((np.zeros(6), np.ones(5))),
"Z": np.concatenate((np.zeros(3), np.ones(8)))})
metric = metrics.Distribution("X", ["Z"])
output = core.Analyze(df).split_by(["Y"]).calculate(metric).run()
correct = pd.DataFrame(
np.array([0.2, 0.8, 0.0, 1.0]),
columns=["X Distribution"],
index=pd.MultiIndex(levels=[[0.0, 1.0], [0.0, 1.0]],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=["Y", "Z"]))
self.assertTrue(all(output.index == correct.index) and
all(output.columns == correct.columns) and
np.all(abs(output.values - correct.values) < 1e-10))
common.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def _isnull_old(obj):
"""Detect missing values. Treat None, NaN, INF, -INF as null.
Parameters
----------
arr: ndarray or object value
Returns
-------
boolean ndarray or boolean
"""
if lib.isscalar(obj):
return lib.checknull_old(obj)
# hack (for now) because MI registers as ndarray
elif isinstance(obj, pd.MultiIndex):
raise NotImplementedError("isnull is not defined for MultiIndex")
elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
return _isnull_ndarraylike_old(obj)
elif isinstance(obj, ABCGeneric):
return obj._constructor(obj._data.isnull(func=_isnull_old))
elif isinstance(obj, list) or hasattr(obj, '__array__'):
return _isnull_ndarraylike_old(np.asarray(obj))
else:
return obj is None
test_base.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 47
收藏 0
点赞 0
评论 0
def test_equals_op_multiindex(self):
# GH9785
# test comparisons of multiindex
from pandas.compat import StringIO
df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
tm.assert_numpy_array_equal(df.index == df.index,
np.array([True, True]))
mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)])
tm.assert_numpy_array_equal(df.index == mi1, np.array([True, True]))
mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)])
tm.assert_numpy_array_equal(df.index == mi2, np.array([True, False]))
mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
df.index == mi3
index_a = Index(['foo', 'bar', 'baz'])
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
df.index == index_a
tm.assert_numpy_array_equal(index_a == mi3,
np.array([False, False, False]))
test_reshape.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_stack_ints(self):
df = DataFrame(
np.random.randn(30, 27),
columns=MultiIndex.from_tuples(
list(itertools.product(range(3), repeat=3))
)
)
assert_frame_equal(
df.stack(level=[1, 2]),
df.stack(level=1).stack(level=1)
)
assert_frame_equal(
df.stack(level=[-2, -1]),
df.stack(level=1).stack(level=1)
)
df_named = df.copy()
df_named.columns.set_names(range(3), inplace=True)
assert_frame_equal(
df_named.stack(level=[1, 2]),
df_named.stack(level=1).stack(level=1)
)
test_reshape.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def test_unstack_level_binding(self):
# GH9856
mi = pd.MultiIndex(
levels=[[u('foo'), u('bar')], [u('one'), u('two')],
[u('a'), u('b')]],
labels=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]],
names=[u('first'), u('second'), u('third')])
s = pd.Series(0, index=mi)
result = s.unstack([1, 2]).stack(0)
expected_mi = pd.MultiIndex(
levels=[['foo', 'bar'], ['one', 'two']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['first', 'second'])
expected = pd.DataFrame(np.array([[np.nan, 0],
[0, np.nan],
[np.nan, 0],
[0, np.nan]],
dtype=np.float64),
index=expected_mi,
columns=pd.Index(['a', 'b'], name='third'))
assert_frame_equal(result, expected)
test_reshape.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def test_unstack_to_series(self):
# check reversibility
data = self.frame.unstack()
self.assertTrue(isinstance(data, Series))
undo = data.unstack().T
assert_frame_equal(undo, self.frame)
# check NA handling
data = DataFrame({'x': [1, 2, np.NaN], 'y': [3.0, 4, np.NaN]})
data.index = Index(['a', 'b', 'c'])
result = data.unstack()
midx = MultiIndex(levels=[['x', 'y'], ['a', 'b', 'c']],
labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx)
assert_series_equal(result, expected)
# check composability of unstack
old_data = data.copy()
for _ in range(4):
data = data.unstack()
assert_frame_equal(old_data, data)
def bar_fueltype_and_country_totals(dfs, keys, figsize=(12,8)):
df = lookup(dfs, keys)
countries = df.columns.levels[0] if isinstance(df.columns, pd.MultiIndex) else df.columns
n = len(countries)
subplots = gather_nrows_ncols(n)
fig, ax = plt.subplots(*subplots, figsize=figsize)
if sum(subplots)>2:
ax_iter = ax.flat
else:
ax_iter = np.array(ax).flat
for country in countries:
ax = next(ax_iter)
df[country].plot.bar(ax=ax, sharex=True, rot=55, legend=None)
ax.ticklabel_format(axis='y', style='sci', scilimits=(-2,2))
ax.set_title(country)
fig.tight_layout(pad=0.5)
return fig, ax
def _get_header_iterable(self):
"""Reformats all but the last header rows."""
df_clean = self.df.loc[:, self.df.columns.get_level_values(0) != ORG_ROW_NAMES]
if isinstance(df_clean.columns, pd.MultiIndex):
transpose_tuples = zip(*df_clean.columns.tolist())
header_values = []
for i, t in enumerate(transpose_tuples):
if i < len(transpose_tuples) - 1:
# Not the last column, aggregate repeated items, e.g. [['aa', 'aa', 'aa'], ['bb', 'bb', 'bb']]
header_values.append([list(g) for _, g in itertools.groupby(t)])
else:
# For the last column keep all elements in single list, e.g. ['a', 'b', 'c', 'a', 'b', 'c']
header_values.append(list(t))
return header_values
else:
return [df_clean.columns.tolist()]
def _perform_operation(self, dataframe, key, schema, value_func, operation):
# Check for references
references = (dataframe.columns.get_level_values(0).tolist()
if isinstance(dataframe.columns, pd.MultiIndex)
else [None])
for reference in references:
metric_df = value_func(dataframe, schema, reference=reference)
operation_key = ('{}_{}'.format(metric_df.name, key)
if reference is None
else (reference, '{}_{}'.format(metric_df.name[1], key)))
if isinstance(dataframe.index, pd.MultiIndex):
unstack_levels = list(range(1, len(dataframe.index.levels)))
dataframe[operation_key] = metric_df.groupby(level=unstack_levels).apply(operation)
else:
dataframe[operation_key] = operation(metric_df)
def _render_data(self, dataframe, display_schema):
n = len(dataframe.index.levels) if isinstance(dataframe.index, pd.MultiIndex) else 1
dimensions = list(display_schema['dimensions'].items())
row_dimensions, column_dimensions = dimensions[:n], dimensions[n:]
data = []
for idx, df_row in dataframe.iterrows():
row = {}
if not isinstance(idx, tuple):
idx = (idx,)
for key, value in self._render_dimension_data(idx, row_dimensions):
row[key] = value
for key, value in self._render_metric_data(df_row, column_dimensions,
display_schema['metrics'], display_schema.get('references')):
row[key] = value
data.append(row)
return data
def create_multi_index(arr):
'''From DataArray arr make a pandas.MultiIndex for the arr.coords
Parameters
----------
arr: xarray.DataArray
Returns
-------
index: pandas.MultiIndex instance with index names
taken from arr.dims and levels taken from arr.coords
Examples
--------
'''
np_arrs = tuple(getattr(arr, dim).values for dim in arr.dims)
index = pd.MultiIndex.from_product(np_arrs, names=arr.dims)
return index
def setUp(self):
samples = []
t = 1.0
for i in range(20):
sample = []
sample.append(t)
t += 1.1
sample.append(t)
t += 0.2
sample.append(t)
t += 1.5
sample.append(t)
t += 0.1
samples.append(sample)
self.samples = pd.DataFrame(
data=samples,
columns=pd.MultiIndex(
levels=[['fn1', 'fn2'], ['begin', 'end']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]]
)
)
def read_seurat_hdf5(hdf5_file):
import h5py
with h5py.File(hdf5_file, 'r') as handle:
cols = handle.get("seurat_matrix/columns").value
rows = handle.get("seurat_matrix/rows").value
df = handle.get("seurat_matrix/matrix").value
seurat_matrix = pd.DataFrame(df, index=cols, columns=rows).T
# add info as multiindex columns
condition = map(lambda x: x[0], seurat_matrix.columns.str.split("|"))
replicate = map(lambda x: x[1], seurat_matrix.columns.str.split("|"))
cell = map(lambda x: x[2], seurat_matrix.columns.str.split("|"))
grna = map(lambda x: x[3], seurat_matrix.columns.str.split("|"))
gene = map(lambda x: x[1] if len(x) > 1 else x[0][:4], pd.Series(grna).str.split("_"))
seurat_matrix.columns = pd.MultiIndex.from_arrays([condition, replicate, cell, grna, gene], names=['condition', 'replicate', 'cell', 'grna', 'gene'])
return seurat_matrix
def test_observed_otus(self):
t = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
obs = _compute_rarefaction_data(feature_table=t,
min_depth=1,
max_depth=200,
steps=2,
iterations=1,
phylogeny=None,
metrics=['observed_otus'])
exp_ind = pd.MultiIndex.from_product(
[[1, 200], [1]],
names=['depth', 'iter'])
exp = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
columns=exp_ind,
index=['S1', 'S2', 'S3'])
pdt.assert_frame_equal(obs['observed_otus'], exp)
def test_multiple_metrics(self):
t = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
obs = _compute_rarefaction_data(feature_table=t,
min_depth=1,
max_depth=200,
steps=2,
iterations=1,
phylogeny=None,
metrics=['observed_otus', 'shannon'])
exp_ind = pd.MultiIndex.from_product(
[[1, 200], [1]],
names=['depth', 'iter'])
exp = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
columns=exp_ind,
index=['S1', 'S2', 'S3'])
pdt.assert_frame_equal(obs['observed_otus'], exp)
exp = pd.DataFrame(data=[[0., 0.811278124459], [0., 1.], [0., 1.]],
columns=exp_ind,
index=['S1', 'S2', 'S3'])
pdt.assert_frame_equal(obs['shannon'], exp)