def calc_pvalues_wt(self, label):
"""
Calculate uncorrected pvalue for each variant compared to wild type.
"""
if self.check_store("/main/{}/scores_pvalues_wt".format(label)):
return
idx = pd.IndexSlice
wt = self.store.select("/main/{}/scores".format(label),
"index=WILD_TYPE_VARIANT")
if len(wt) == 0: # no wild type score
logging.info("Failed to find wild type score, skipping wild type "
"p-value calculations", extra={'oname': self.name})
return
data = self.store.select("/main/{}/scores".format(label),
"index!=WILD_TYPE_VARIANT")
columns = pd.MultiIndex.from_product([sorted(self.child_names()),
sorted(["z", "pvalue_raw"])],
names=["condition", "value"])
result_df = pd.DataFrame(index=data.index, columns=columns)
condition_labels = data.columns.levels[0]
for cnd in condition_labels:
result_df.loc[:, idx[cnd, 'z']] = \
np.absolute(wt.loc[WILD_TYPE_VARIANT, idx[cnd, 'score']] -
data.loc[:, idx[cnd, 'score']]) / \
np.sqrt(wt.loc[WILD_TYPE_VARIANT, idx[cnd, 'SE']] ** 2 +
data.loc[:, idx[cnd, 'SE']] ** 2)
result_df.loc[:, idx[cnd, 'pvalue_raw']] = \
2 * stats.norm.sf(result_df.loc[:, idx[cnd, 'z']])
self.store.put("/main/{}/scores_pvalues_wt".format(label), result_df,
format="table")
评论列表
文章目录