def speed_fusion(self, shared_fn=shared, gpu=False, s=None):
"""
param type s: a slice object
param s: a slice to apply to the case to execute. If None, exec all case.
"""
shp = (3000, 3000)
shp = (1000, 1000)
nb_repeat = 50
# linker=gof.CLinker
# linker=gof.OpWiseCLinker
mode1 = copy.copy(compile.get_default_mode())
mode1._optimizer = mode1._optimizer.including('local_elemwise_fusion')
# TODO:clinker is much faster... but use to much memory
# Possible cause: as their is do deletion of intermediate value when we don't keep the fct.
# More plausible cause: we keep a link to the output data?
# Follow up. Clinker do the same... second cause?
mode2 = copy.copy(compile.get_default_mode())
mode2._optimizer = mode2._optimizer.excluding('local_elemwise_fusion')
print("test with linker", str(mode1.linker))
times1 = self.do(mode1, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat,
assert_len_topo=False, slice=s)
times2 = self.do(mode2, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat,
assert_len_topo=False, slice=s)
print("times1 with local_elemwise_fusion")
print(times1, times1.min(), times1.max(), times1.sum())
print("times2 without local_elemwise_fusion")
print(times2, times2.min(), times2.max(), times2.sum())
d = times2 / times1
print("times2/times1")
print(d)
print("min", d.min(), "argmin", d.argmin(), "max", d.max(), \
"mean", d.mean(), "std", d.std())
评论列表
文章目录