def __init__(self, lib, dtype,
N, C, K,
D, H, W,
T, R, S,
M, P, Q,
pad_d, pad_h, pad_w,
str_d, str_h, str_w,
dil_d, dil_h, dil_w):
# Support N = 1,2 and multiples of 4 for now
assert N in (1,2) or N % 4 == 0
super(UpdateWinograd_3x3_2x2, self).__init__(lib, dtype,
N, C, K, 1, H, W, 1, 3, 3, 1, P, Q,
0, pad_h, pad_w, 1,1,1, 1,1,1)
SMs = _get_sm_count()
self.autotune_key = [native_str(x) for x in ("update_3x3_2x2",
SMs, 0, dtype.itemsize, N, C, K, H, W, P, Q)]
# insert Python version in filename to avoid Py2/Py3 incompatibilities in shelve
self.autotune_db_file = os.path.join(lib.cache_dir, "autotune%d.db" % sys.version_info[0])
self.init()
lib.set_scratch_size(self.image_trans.size, self.output_trans.size)
# allow for .5 seconds worth of warmup when autotuning
# assume 10 Tflops on 24 SMs
self.warmup = min(max(int(5e12 / (P * Q * K * N * C * 9 * 2.0) * (SMs / 24.0)), 1), 1000)
评论列表
文章目录