def get_likelihood(term_document_matrix, label_index, smoothing=0):
""" Compute likelihood based on training samples
Args:
term_document_matrix (sparse matrix)
label_index (grouped sample indices by class)
smoothing (integer, additive Laplace smoothing parameter)
Returns:
dictionary, with class as key, corresponding conditional probability P(feature|class) vector as value
"""
likelihood = {}
for label, index in label_index.items():
likelihood[label] = term_document_matrix[index, :].sum(axis=0) + smoothing
likelihood[label] = np.asarray(likelihood[label])[0]
total_count = likelihood[label].sum()
likelihood[label] = likelihood[label] / float(total_count)
return likelihood
email_spam.py 文件源码
python
阅读 34
收藏 0
点赞 0
评论 0
评论列表
文章目录