print_batch.py 文件源码-python代码片段

def stats_verify():
  # Read data
  worker_to_is_good = collections.defaultdict(list)
  worker_to_times = collections.defaultdict(list)
  with open(OPTS.filename) as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
      if row['AssignmentStatus'] == 'Rejected': continue
      worker_id = row['WorkerId']
      ans_is_good = [x for s in row['Answer.responses'].split('\t')
                     for x in s.split('|')]
      time = float(row['WorkTimeInSeconds'])
      worker_to_is_good[worker_id].extend(ans_is_good)
      worker_to_times[worker_id].append(time)

  # Aggregate by worker
  print '%d total workers' % len(worker_to_times)
  worker_stats = {}
  for worker_id in worker_to_times:
    times = sorted(worker_to_times[worker_id])
    t_median = times[len(times)/2]
    t_mean = sum(times) / float(len(times))
    is_good_list = worker_to_is_good[worker_id]
    num_qs = len(is_good_list)
    frac_good = sum(1.0 for x in is_good_list if x == 'yes') / num_qs
    worker_stats[worker_id] = (t_median, t_mean, num_qs, frac_good)

  # Print
  sorted_ids = sorted(list(worker_stats), key=lambda x: worker_stats[x][3])
  for worker_id in sorted_ids:
    t_median, t_mean, num_qs, frac_good = worker_stats[worker_id]
    print 'Worker %s: t_median %.1f, t_mean %.1f, %d questions, %.1f%% good' % (
        worker_id, t_median, t_mean, num_qs, 100.0 * frac_good)