def normalize_simple(matrix, mask):
"""Normalizes a matrix by columns, and then by rows. With multiple
time-series, the data are normalized to the within-series total, not the
entire data set total.
Parameters
----------
matrix: np.matrix
Time-series matrix of abundance counts. Rows are sequences, columns
are samples/time-points.
mask: list or np.array
List of objects with length matching the number of timepoints, where
unique values delineate multiple time-series. If there is only one
time-series in the data set, it's a list of identical objects.
Returns
-------
normal_matrix: np.matrix
Matrix where the columns (within-sample) have been converted to
proportions, then the rows are normalized to sum to 1.
"""
normal_matrix = matrix / matrix.sum(0)
normal_matrix[np.invert(np.isfinite(normal_matrix))] = 0
for mask_val in np.unique(mask):
y = normal_matrix[:, np.where(mask == mask_val)[0]]
y = np.apply_along_axis(zscore, 1, y)
normal_matrix[:, np.where(mask == mask_val)[0]] = y
del y
return normal_matrix
评论列表
文章目录