def analyze(log_fname, nn=True, max_bin=6, n_first_removed=3):
"""
Analyze anonymized deepbeat.org usage logs.
Input:
log_fname -- Path to the log file.
nn -- Use scores with the NN feature or not.
max_bin -- Maximum score difference considered
(if too large, the last bins get very noisy).
n_first_removed -- From each user, remove this many first selections,
since in the beginning the user might be just
playing with the tool.
Output:
bin_centers
probabilities to select the better line according to the algorithm
standard deviations
"""
users, selections = read_log(log_fname)
print "%d selections, %d unique users" % (len(selections), len(set(users)))
us = {} # user -> selections
for u, s in zip(users, selections):
if u not in us:
us[u] = []
us[u].append(s)
lens = [len(s) for s in us.itervalues()]
lens = np.array(lens)
score_differences = extract_feedback(us, n_first_removed, nn)
print "t-test:", stats.ttest_1samp(score_differences, 0)
sel_ranks = [s['selectedLine'] for s in selections]
print "Histogram of selected line indices:"
print np.histogram(sel_ranks, range(21))
xs, probs, stds = compute_probs(score_differences, max_bin)
return xs, probs, stds
评论列表
文章目录