def stats_verify():
# Read data
worker_to_is_good = collections.defaultdict(list)
worker_to_times = collections.defaultdict(list)
with open(OPTS.filename) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if row['AssignmentStatus'] == 'Rejected': continue
worker_id = row['WorkerId']
ans_is_good = [x for s in row['Answer.responses'].split('\t')
for x in s.split('|')]
time = float(row['WorkTimeInSeconds'])
worker_to_is_good[worker_id].extend(ans_is_good)
worker_to_times[worker_id].append(time)
# Aggregate by worker
print '%d total workers' % len(worker_to_times)
worker_stats = {}
for worker_id in worker_to_times:
times = sorted(worker_to_times[worker_id])
t_median = times[len(times)/2]
t_mean = sum(times) / float(len(times))
is_good_list = worker_to_is_good[worker_id]
num_qs = len(is_good_list)
frac_good = sum(1.0 for x in is_good_list if x == 'yes') / num_qs
worker_stats[worker_id] = (t_median, t_mean, num_qs, frac_good)
# Print
sorted_ids = sorted(list(worker_stats), key=lambda x: worker_stats[x][3])
for worker_id in sorted_ids:
t_median, t_mean, num_qs, frac_good = worker_stats[worker_id]
print 'Worker %s: t_median %.1f, t_mean %.1f, %d questions, %.1f%% good' % (
worker_id, t_median, t_mean, num_qs, 100.0 * frac_good)
评论列表
文章目录