def cohens_kappa(results, workers):
"""
Compute Cohen's Kappa on all workers that answered at least 5 HITs
:param results:
:return:
"""
answers_per_worker = { worker_id : { key : results[key][worker_id] for key in results.keys()
if worker_id in results[key] }
for worker_id in workers }
answers_per_worker = { worker_id : answers for worker_id, answers in answers_per_worker.iteritems()
if len(answers) >= 5 }
curr_workers = answers_per_worker.keys()
worker_pairs = [(worker1, worker2) for worker1 in curr_workers for worker2 in curr_workers if worker1 != worker2]
label_index = { True : 1, False : 0 }
pairwise_kappa = { worker_id : { } for worker_id in answers_per_worker.keys() }
# Compute pairwise Kappa
for (worker1, worker2) in worker_pairs:
mutual_hits = set(answers_per_worker[worker1].keys()).intersection(set(answers_per_worker[worker2].keys()))
mutual_hits = set([hit for hit in mutual_hits if not pandas.isnull(hit)])
if len(mutual_hits) >= 5:
worker1_labels = np.array([label_index[answers_per_worker[worker1][key][0]] for key in mutual_hits])
worker2_labels = np.array([label_index[answers_per_worker[worker2][key][0]] for key in mutual_hits])
curr_kappa = cohen_kappa_score(worker1_labels, worker2_labels)
if not math.isnan(curr_kappa):
pairwise_kappa[worker1][worker2] = curr_kappa
pairwise_kappa[worker2][worker1] = curr_kappa
# Remove worker answers with low agreement to others
workers_to_remove = set()
for worker, kappas in pairwise_kappa.iteritems():
if np.mean(kappas.values()) < 0.1:
print 'Removing %s' % worker
workers_to_remove.add(worker)
kappa = np.mean([k for worker1 in pairwise_kappa.keys() for worker2, k in pairwise_kappa[worker1].iteritems()
if not worker1 in workers_to_remove and not worker2 in workers_to_remove])
# Return the average
return kappa, workers_to_remove
评论列表
文章目录