def get_best_label(label_list,num):
topic_ls = get_topic_lg(topic_list[num])
val_dict = {}
for item in label_list:
trigrams = [item[i:i+3] for i in range(0, len(item) - 2)] #Extracting letter trigram for label
label_cnt = Counter(trigrams)
total = sum(label_cnt.values(), 0.0)
for key in label_cnt:
label_cnt[key] /= total
tot_keys = list(set(topic_ls.keys() + label_cnt.keys()))
listtopic = []
listlabel = []
for elem in tot_keys:
if elem in topic_ls:
listtopic.append(topic_ls[elem])
else:
listtopic.append(0.0)
if elem in label_cnt:
listlabel.append(label_cnt[elem])
else:
listlabel.append(0.0)
val = 1 - cosine(np.array(listtopic),np.array(listlabel)) # Cosine Similarity
val_dict[item] = val
list_sorted=sorted(val_dict.items(), key=lambda x:x[1], reverse = True) # Sorting the labels by rank
return [i[0] for i in list_sorted[:int(args.num_unsup_labels)]]
unsupervised_labels.py 文件源码
python
阅读 93
收藏 0
点赞 0
评论 0
评论列表
文章目录