def get_lt_ranks(lab_list,num):
topic_ls = get_topic_lt(topic_list[num])
val_dict = {}
val_list =[]
final_list=[]
for item in lab_list:
trigrams = [item[i:i+3] for i in range(0, len(item) - 2)] #Letter trigram for candidate label.
label_cnt = Counter(trigrams)
total = sum(label_cnt.values(), 0.0)
for key in label_cnt:
label_cnt[key] /= total
tot_keys = list(set(topic_ls.keys() + label_cnt.keys()))
listtopic = []
listlabel = []
for elem in tot_keys:
if elem in topic_ls:
listtopic.append(topic_ls[elem])
else:
listtopic.append(0.0)
if elem in label_cnt:
listlabel.append(label_cnt[elem])
else:
listlabel.append(0.0)
val = 1 - cosine(np.array(listtopic),np.array(listlabel)) # Cosine Similarity
val_list.append((item,val))
rank_val = [i[1] for i in val_list]
arr = np.array(rank_val)
order = arr.argsort()
ranks = order.argsort()
for i,elem in enumerate(val_list):
final_list.append((elem[0],ranks[i],int(num)))
return final_list
# Generates letter trigram feature
train_svm_model.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录