def intersect_and_sort_samples(sample_metadata, feature_table):
'''Return input tables retaining only shared samples, row order equivalent.
Parameters
----------
sample_metadata : pd.DataFrame
Contingency table with rows, columns = samples, metadata.
feature_table : pd.DataFrame
Contingency table with rows, columns = samples, features.
Returns
-------
sample_metadata, feature_table : pd.DataFrame, pd.DataFrame
Input tables with unshared samples removed and ordered equivalently.
Raises
------
ValueError
If no shared samples are found.
'''
shared_samples = np.intersect1d(sample_metadata.index, feature_table.index)
if shared_samples.size == 0:
raise ValueError('There are no shared samples between the feature '
'table and the sample metadata. Ensure that you have '
'passed the correct files.')
elif (shared_samples.size == sample_metadata.shape[0] ==
feature_table.shape[0]):
s_metadata = sample_metadata.copy()
s_features = feature_table.copy()
else:
s_metadata = sample_metadata.loc[np.in1d(sample_metadata.index,
shared_samples), :].copy()
s_features = feature_table.loc[np.in1d(feature_table.index,
shared_samples), :].copy()
return s_metadata, s_features.loc[s_metadata.index, :]
评论列表
文章目录