def diversity(dev, gen_test, beam_size, hypo_len, noise_size, per_premise, samples):
step = len(dev[0]) / samples
sind = [i * step for i in range(samples)]
p = Progbar(per_premise * samples)
for i in sind:
hypos = []
unique_words = []
hypo_list = []
premise = dev[0][i]
prem_list = set(cut_zeros(list(premise)))
while len(hypos) < per_premise:
label = np.argmax(dev[2][i])
words = single_generate(premise, label, gen_test, beam_size, hypo_len, noise_size)
hypos += [str(ex) for ex in words]
unique_words += [int(w) for ex in words for w in ex if w > 0]
hypo_list += [set(cut_zeros(list(ex))) for ex in words]
jacks = []
prem_jacks = []
for u in range(len(hypo_list)):
sim_prem = len(hypo_list[u] & prem_list)/float(len(hypo_list[u] | prem_list))
prem_jacks.append(sim_prem)
for v in range(u+1, len(hypo_list)):
sim = len(hypo_list[u] & hypo_list[v])/float(len(hypo_list[u] | hypo_list[v]))
jacks.append(sim)
avg_dist_hypo = 1 - np.mean(jacks)
avg_dist_prem = 1 - np.mean(prem_jacks)
d = entropy(Counter(hypos).values())
w = entropy(Counter(unique_words).values())
p.add(len(hypos), [('diversity', d),('word_entropy', w),('avg_dist_hypo', avg_dist_hypo), ('avg_dist_prem', avg_dist_prem)])
arrd = p.sum_values['diversity']
arrw = p.sum_values['word_entropy']
arrj = p.sum_values['avg_dist_hypo']
arrp = p.sum_values['avg_dist_prem']
return arrd[0] / arrd[1], arrw[0] / arrw[1], arrj[0] / arrj[1], arrp[0] / arrp[1]
评论列表
文章目录