TargetPlanner.py 文件源码

python
阅读 28 收藏 0 点赞 0 评论 0

项目:bnpy 作者: bnpy 项目源码 文件源码
def calc_underprediction_scores_per_word(model, Data, LP=None, **kwargs):
    ''' Find scalar score for each vocab word. Larger => worse prediction.
    '''
    if LP is None:
        LP = model.calc_local_params(Data)
    DocWordFreq_emp = calcWordFreqPerDoc_empirical(Data)
    DocWordFreq_model = calcWordFreqPerDoc_model(model, LP)
    uError = np.maximum(DocWordFreq_emp - DocWordFreq_model, 0)
    # For each word, identify set of relevant documents
    DocWordMat = Data.to_sparse_docword_matrix().toarray()
    score = np.zeros(Data.vocab_size)
    # TODO: only consider words with many docs overall
    for vID in xrange(Data.vocab_size):
        countPerDoc = DocWordMat[:, vID]
        typicalWordCount = np.median(countPerDoc[countPerDoc > 0])
        candidateDocs = np.flatnonzero(countPerDoc > typicalWordCount)
        if len(candidateDocs) < 10:
            continue
        score[vID] = np.mean(uError[candidateDocs, vID])
    # Only give positive probability to words with above average score
    score = score - np.mean(score)
    score = np.maximum(score, 0)
    score = score * score  # make more peaked!
    score /= score.sum()
    return score
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号