softtfidf.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:LLString 作者: mitll 项目源码 文件源码
def compute_VwS(self,s):
        """ Compute V(w,S) as defined by Cohen et al.'s IJCAI03 paper """
        # Get term-frequency vectors and vocab list for string
        cv = CountVectorizer(min_df = 0.0, token_pattern=u'(?u)\\b\\w+\\b')
        tf = cv.fit_transform([s]); tf = tf.tocsr()
        vocab = cv.vocabulary_

        # Compute V(w,S) for string
        vprime_ws = dict()
        vprime_ws_norm = 0
        for w in vocab:
            if w in self.CORPUS_VOCAB:
                vprime_ws[w] = math.log(tf[0,vocab[w]]+1)*self.LOG_IDF[self.CORPUS_VOCAB[w]]
            else:
                vprime_ws[w] = math.log(tf[0,vocab[w]]+1)*self.OOV_IDF_VAL #if not in vocab, defauly to OOC_IDF_VAL
            vprime_ws_norm += vprime_ws[w]**2
        vprime_ws_norm = math.sqrt(vprime_ws_norm)

        return (vocab,vprime_ws,vprime_ws_norm)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号