identify.py 文件源码-python代码片段

def benchmark(rdb, sample, refs):
    """
    Basic benchmarking function, call with a reference db `rdb`, a `sample`
    and a list of `refs`.
    """

    def bench1():
        return [compare_strings_concat_levenshtein(sample, ref)
                for ref in refs]

    def bench2():
        return [compare_strings_set_union(sample, ref)
                for ref in refs]
    def bench3():
        return [compare_cc_list_levenshtein(sample, ref)
                for ref in refs]

    def bench4():
        return [compare_cc_list_set_union(sample, ref)
                for ref in refs]

    def bench5():
        return [compare_cc_spp(sample, ref)
                for ref in refs]

    def bench6():
        return [compare_bb_hash_bloomfilter(sample, ref)
                for ref in refs]

    # Only run the slow ones a few times, and cc3 only once because of caching
    t1 = timeit.timeit(lambda: bench1(), setup="gc.enable()", number=5) / 5.0
    t2 = timeit.timeit(lambda: bench2(), setup="gc.enable()", number=100) / 100.0
    t3 = timeit.timeit(lambda: bench3(), setup="gc.enable()", number=100) / 100.0
    t4 = timeit.timeit(lambda: bench4(), setup="gc.enable()", number=100) / 100.0
    t5 = timeit.timeit(lambda: bench5(), setup="gc.enable()", number=1) / 1.0
    t6 = timeit.timeit(lambda: bench6(), setup="gc.enable()", number=100) / 100.0

    print(t1, t2, t3, t4, t5, t6)