def calc_dis_jarccard2(neighs,neighs2):
sim_fea = []
for i in neighs:
for j in neighs2:
if i==j:continue
if (j in index_q) and (i in index_q):
q_str = index_q[i]
nei_str = index_q[j]
s1 = set(q_str.lower().split())
s2 = set(nei_str.lower().split())
sim_fea.append(dist_utils._jaccard_coef(s1, s2))
aggregation_mode = ["mean", "std", "max", "min", "median"]
aggregator = [None if m == "" else getattr(np, m) for m in aggregation_mode]
score = []
for n, agg in enumerate(aggregator):
if len(sim_fea) == 0:
s = -1
try:
s = agg(sim_fea)
except:
s = -1
score.append(s)
return score
# sps.spearmanr(train_fea,train['is_duplicate'])[0]
generate_neighbor_dis.py 文件源码
python
阅读 41
收藏 0
点赞 0
评论 0
评论列表
文章目录