def bioenv(output_dir: str, distance_matrix: skbio.DistanceMatrix,
metadata: qiime2.Metadata) -> None:
# convert metadata to numeric values where applicable, drop the non-numeric
# values, and then drop samples that contain NaNs
df = metadata.to_dataframe()
df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))
# filter categorical columns
pre_filtered_cols = set(df.columns)
df = df.select_dtypes([numpy.number]).dropna()
filtered_categorical_cols = pre_filtered_cols - set(df.columns)
# filter 0 variance numerical columns
pre_filtered_cols = set(df.columns)
df = df.loc[:, df.var() != 0]
filtered_zero_variance_cols = pre_filtered_cols - set(df.columns)
# filter the distance matrix to exclude samples that were dropped from
# the metadata, and keep track of how many samples survived the filtering
# so that information can be presented to the user.
initial_dm_length = distance_matrix.shape[0]
distance_matrix = distance_matrix.filter(df.index, strict=False)
filtered_dm_length = distance_matrix.shape[0]
result = skbio.stats.distance.bioenv(distance_matrix, df)
result = q2templates.df_to_html(result)
index = os.path.join(TEMPLATES, 'bioenv_assets', 'index.html')
q2templates.render(index, output_dir, context={
'initial_dm_length': initial_dm_length,
'filtered_dm_length': filtered_dm_length,
'filtered_categorical_cols': ', '.join(filtered_categorical_cols),
'filtered_zero_variance_cols': ', '.join(filtered_zero_variance_cols),
'result': result})
评论列表
文章目录