analyze_logs.py 文件源码-python代码片段

def analyze(log_fname, nn=True, max_bin=6, n_first_removed=3):
    """
    Analyze anonymized deepbeat.org usage logs.

    Input:
        log_fname -- Path to the log file.
        nn -- Use scores with the NN feature or not.
        max_bin -- Maximum score difference considered
                   (if too large, the last bins get very noisy).
        n_first_removed -- From each user, remove this many first selections,
                           since in the beginning the user might be just
                           playing with the tool.       
    Output:
        bin_centers
        probabilities to select the better line according to the algorithm
        standard deviations
    """
    users, selections = read_log(log_fname)
    print "%d selections, %d unique users" % (len(selections), len(set(users)))
    us = {}    # user -> selections
    for u, s in zip(users, selections):
        if u not in us:
            us[u] = []
        us[u].append(s)
    lens = [len(s) for s in us.itervalues()]
    lens = np.array(lens)

    score_differences = extract_feedback(us, n_first_removed, nn)
    print "t-test:", stats.ttest_1samp(score_differences, 0)

    sel_ranks = [s['selectedLine'] for s in selections]
    print "Histogram of selected line indices:"
    print np.histogram(sel_ranks, range(21))

    xs, probs, stds = compute_probs(score_differences, max_bin)
    return xs, probs, stds