def pvals_to_long(pvals):
"""
Given dataframe with signed p-values, convert to longform
with columns: otu, study, direction, pval, sample_size.
Parameters
----------
pvals : pandas DataFrame
Genera in rows, studies in columns, signed pvalues in values.
Positive indicates higher in disease, negatives is higher in healthy.
Returns
-------
longpvals : pandas DataFrame
Tidy dataframe with columns otu, study, direction, and pval (for that
direction)
"""
pvals.index.name = 'otu'
pvals = pvals.reset_index()
longpvals = pd.melt(pvals, id_vars='otu', var_name='dataset',
value_name='signed_qvalue').dropna()
# Convert all p-values to health-associated pvalue
# Original p-values were calculated from KW test, making them two-sided.
# If the pvalue is negative, then abs(p)/2 is the health-associated pval.
# If the pvalue is positive, then 1 - abs(p)/2 is the health-associated
# pvalue.
p_to_healthy = lambda x: abs(x)/2.0 if x <= 0 else 1-abs(x)/2.0
longpvals['q'] = longpvals['signed_qvalue'].map(p_to_healthy)
longpvals['direction'] = 'healthy'
# Now add the disease-associated qvalues
disqs = copy.deepcopy(longpvals)
disqs['direction'] = 'disease'
disqs['q'] = 1 - disqs['q']
longpvals = pd.concat((longpvals, disqs))
return longpvals
评论列表
文章目录