def zero_center_normalize(df, samples, logInput=False, method='median'):
'''
Transforming input peptide abundance table into log2-scale and centralize to zero.
Inputs:
df : dataframe of peptide abundaces
samples: column names of selected samples
logInput: input abundances are already in log scale
method: method for estimating zero point
'''
assert method in ('median', 'average', 'GMM'), \
'Zero centering method has to be among median, average or GMM!'
if not logInput:
# convert abundances to log2 scale
df[samples] = df[samples].apply(np.log2)
if method == 'average':
norm_scale = np.nanmean(df[samples], axis=0)
elif method == 'median':
norm_scale = np.nanmedian(df[samples], axis=0)
elif method == 'GMM':
''' two-component Gaussian mixture model '''
from sklearn.mixture import GMM
gmm = GMM(2)
norm_scale = []
for sp in samples:
v = df[sp].values
v = v[np.logical_not(np.isnan(v))]
v = v[np.logical_not(np.isinf(v))]
try:
gmm.fit(np.matrix(v.values).T)
vmean = gmm.means_[np.argmin(gmm.covars_)]
norm_scale.append(vmean)
except:
norm_scale.append(np.nanmean(v))
norm_scale = np.array(norm_scale)
df[samples] = df[samples] - norm_scale
return df
评论列表
文章目录