def _class_frequencies(X, y):
"""Count the number of non-zero values for each class y in sparse X."""
labels = np.unique(y)
if len(labels) > 2:
raise ValueError("Delta works only with binary classification problems")
# Indices for each type of labels in y
N1 = np.where(y == labels[0])[0]
N2 = np.where(y == labels[1])[0]
# Number of positive documents that each term appears on
df1 = np.bincount(X[N1].nonzero()[1], minlength=X.shape[1])
# Number of negative documents that each term appears on
df2 = np.bincount(X[N2].nonzero()[1], minlength=X.shape[1])
return N1.shape[0], df1, N2.shape[0], df2
评论列表
文章目录