train_svm_model.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:NETL-Automatic-Topic-Labelling- 作者: sb1992 项目源码 文件源码
def get_lt_ranks(lab_list,num):
    topic_ls = get_topic_lt(topic_list[num])
    val_dict = {}
    val_list =[]
    final_list=[]
    for item in lab_list:
        trigrams = [item[i:i+3] for i in range(0, len(item) - 2)] #Letter trigram for candidate label.
        label_cnt = Counter(trigrams)
        total = sum(label_cnt.values(), 0.0)
        for key in label_cnt:
            label_cnt[key] /= total
        tot_keys = list(set(topic_ls.keys() + label_cnt.keys()))
        listtopic = []
        listlabel = []
        for elem in tot_keys:
            if elem in topic_ls:
                listtopic.append(topic_ls[elem])
            else:
                listtopic.append(0.0)
            if elem in label_cnt:
                listlabel.append(label_cnt[elem])
            else:
                listlabel.append(0.0)
        val = 1 - cosine(np.array(listtopic),np.array(listlabel)) # Cosine Similarity
        val_list.append((item,val))
    rank_val = [i[1] for i in val_list]
    arr = np.array(rank_val)
    order = arr.argsort()
    ranks = order.argsort()
    for i,elem in enumerate(val_list):
        final_list.append((elem[0],ranks[i],int(num)))

    return final_list

# Generates letter trigram feature
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号