def prepare_style(self, scale=1.0):
"""Called each phase of the optimization, process the style image according to the scale, then run it
through the model to extract intermediate outputs (e.g. sem4_1) and turn them into patches.
"""
style_img = self.rescale_image(self.style_img_original, scale)
self.style_img = self.model.prepare_image(style_img)
style_map = self.rescale_image(self.style_map_original, scale)
self.style_map = style_map.transpose((2, 0, 1))[np.newaxis].astype(np.float32)
# Compile a function to run on the GPU to extract patches for all layers at once.
layer_outputs = zip(self.style_layers, self.model.get_outputs('sem', self.style_layers))
extractor = self.compile([self.model.tensor_img, self.model.tensor_map], self.do_extract_patches(layer_outputs))
result = extractor(self.style_img, self.style_map)
# Store all the style patches layer by layer, resized to match slice size and cast to 16-bit for size.
self.style_data = {}
for layer, *data in zip(self.style_layers, result[0::3], result[1::3], result[2::3]):
patches = data[0]
l = self.model.network['nn'+layer]
l.num_filters = patches.shape[0] // args.slices
self.style_data[layer] = [d[:l.num_filters*args.slices].astype(np.float16) for d in data]\
+ [np.zeros((patches.shape[0],), dtype=np.float16)]
print(' - Style layer {}: {} patches in {:,}kb.'.format(layer, patches.shape, patches.size//1000))
python类float16()的实例源码
def add_data_args(parser):
data = parser.add_argument_group('Data', 'the input images')
#data.add_argument('--data-train', type=str, help='the training data')
#data.add_argument('--data-val', type=str, help='the validation data')
data.add_argument('--rgb-mean', type=str, default='123.68,116.779,103.939',
help='a tuple of size 3 for the mean rgb')
data.add_argument('--pad-size', type=int, default=0,
help='padding the input image')
data.add_argument('--image-shape', type=str,
help='the image shape feed into the network, e.g. (3,224,224)')
data.add_argument('--num-classes', type=int, help='the number of classes')
data.add_argument('--num-examples', type=int, help='the number of training examples')
data.add_argument('--data-nthreads', type=int, default=4,
help='number of threads for data decoding')
data.add_argument('--benchmark', type=int, default=0,
help='if 1, then feed the network with synthetic data')
data.add_argument('--dtype', type=str, default='float32',
help='data type: float32 or float16')
return data
def test_tile_symmetry(self):
'''
Make sure that tiles are symmetric
'''
upload_file = open('data/Dixon2012-J1-NcoI-R1-filtered.100kb.multires.cool', 'rb')
tileset = tm.Tileset.objects.create(
datafile=dcfu.SimpleUploadedFile(upload_file.name, upload_file.read()),
filetype='cooler',
datatype='matrix',
owner=self.user1,
uuid='aa')
ret = self.client.get('/api/v1/tiles/?d=aa.0.0.0')
contents = json.loads(ret.content.decode('utf-8'))
import base64
r = base64.decodestring(contents['aa.0.0.0']['dense'].encode('utf-8'))
q = np.frombuffer(r, dtype=np.float16)
q = q.reshape((256,256))
def __init__(self, config, model_dir, ob_shape_list):
self.model_dir = model_dir
self.cnn_format = config.cnn_format
self.memory_size = config.memory_size
self.actions = np.empty(self.memory_size, dtype = np.uint8)
self.rewards = np.empty(self.memory_size, dtype = np.integer)
# print(self.memory_size, config.screen_height, config.screen_width)
# self.screens = np.empty((self.memory_size, config.screen_height, config.screen_width), dtype = np.float16)
self.screens = np.empty([self.memory_size] + ob_shape_list, dtype = np.float16)
self.terminals = np.empty(self.memory_size, dtype = np.bool)
self.history_length = config.history_length
# self.dims = (config.screen_height, config.screen_width)
self.dims = tuple(ob_shape_list)
self.batch_size = config.batch_size
self.count = 0
self.current = 0
# pre-allocate prestates and poststates for minibatch
self.prestates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.float16)
self.poststates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.float16)
# self.prestates = np.empty((self.batch_size, self.history_length, self.dims), dtype = np.float16)
# self.poststates = np.empty((self.batch_size, self.history_length, self.dims), dtype = np.float16)
def test_sum(self):
for dt in (np.int, np.float16, np.float32, np.float64, np.longdouble):
for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127,
128, 1024, 1235):
tgt = dt(v * (v + 1) / 2)
d = np.arange(1, v + 1, dtype=dt)
assert_almost_equal(np.sum(d), tgt)
assert_almost_equal(np.sum(d[::-1]), tgt)
d = np.ones(500, dtype=dt)
assert_almost_equal(np.sum(d[::2]), 250.)
assert_almost_equal(np.sum(d[1::2]), 250.)
assert_almost_equal(np.sum(d[::3]), 167.)
assert_almost_equal(np.sum(d[1::3]), 167.)
assert_almost_equal(np.sum(d[::-2]), 250.)
assert_almost_equal(np.sum(d[-1::-2]), 250.)
assert_almost_equal(np.sum(d[::-3]), 167.)
assert_almost_equal(np.sum(d[-1::-3]), 167.)
# sum with first reduction entry != 0
d = np.ones((1,), dtype=dt)
d += d
assert_almost_equal(d, 2.)
def setUp(self):
# An array of all possible float16 values
self.all_f16 = np.arange(0x10000, dtype=uint16)
self.all_f16.dtype = float16
self.all_f32 = np.array(self.all_f16, dtype=float32)
self.all_f64 = np.array(self.all_f16, dtype=float64)
# An array of all non-NaN float16 values, in sorted order
self.nonan_f16 = np.concatenate(
(np.arange(0xfc00, 0x7fff, -1, dtype=uint16),
np.arange(0x0000, 0x7c01, 1, dtype=uint16)))
self.nonan_f16.dtype = float16
self.nonan_f32 = np.array(self.nonan_f16, dtype=float32)
self.nonan_f64 = np.array(self.nonan_f16, dtype=float64)
# An array of all finite float16 values, in sorted order
self.finite_f16 = self.nonan_f16[1:-1]
self.finite_f32 = self.nonan_f32[1:-1]
self.finite_f64 = self.nonan_f64[1:-1]
def test_half_values(self):
"""Confirms a small number of known half values"""
a = np.array([1.0, -1.0,
2.0, -2.0,
0.0999755859375, 0.333251953125, # 1/10, 1/3
65504, -65504, # Maximum magnitude
2.0**(-14), -2.0**(-14), # Minimum normal
2.0**(-24), -2.0**(-24), # Minimum subnormal
0, -1/1e1000, # Signed zeros
np.inf, -np.inf])
b = np.array([0x3c00, 0xbc00,
0x4000, 0xc000,
0x2e66, 0x3555,
0x7bff, 0xfbff,
0x0400, 0x8400,
0x0001, 0x8001,
0x0000, 0x8000,
0x7c00, 0xfc00], dtype=uint16)
b.dtype = float16
assert_equal(a, b)
def test_half_ordering(self):
"""Make sure comparisons are working right"""
# All non-NaN float16 values in reverse order
a = self.nonan_f16[::-1].copy()
# 32-bit float copy
b = np.array(a, dtype=float32)
# Should sort the same
a.sort()
b.sort()
assert_equal(a, b)
# Comparisons should work
assert_((a[:-1] <= a[1:]).all())
assert_(not (a[:-1] > a[1:]).any())
assert_((a[1:] >= a[:-1]).all())
assert_(not (a[1:] < a[:-1]).any())
# All != except for +/-0
assert_equal(np.nonzero(a[:-1] < a[1:])[0].size, a.size-2)
assert_equal(np.nonzero(a[1:] > a[:-1])[0].size, a.size-2)
def test_half_coercion(self):
"""Test that half gets coerced properly with the other types"""
a16 = np.array((1,), dtype=float16)
a32 = np.array((1,), dtype=float32)
b16 = float16(1)
b32 = float32(1)
assert_equal(np.power(a16, 2).dtype, float16)
assert_equal(np.power(a16, 2.0).dtype, float16)
assert_equal(np.power(a16, b16).dtype, float16)
assert_equal(np.power(a16, b32).dtype, float16)
assert_equal(np.power(a16, a16).dtype, float16)
assert_equal(np.power(a16, a32).dtype, float32)
assert_equal(np.power(b16, 2).dtype, float64)
assert_equal(np.power(b16, 2.0).dtype, float64)
assert_equal(np.power(b16, b16).dtype, float16)
assert_equal(np.power(b16, b32).dtype, float32)
assert_equal(np.power(b16, a16).dtype, float16)
assert_equal(np.power(b16, a32).dtype, float32)
assert_equal(np.power(a32, a16).dtype, float32)
assert_equal(np.power(a32, b16).dtype, float32)
assert_equal(np.power(b32, a16).dtype, float16)
assert_equal(np.power(b32, b16).dtype, float32)
def for_float_dtypes(name='dtype', no_float16=False):
"""Decorator that checks the fixture with all float dtypes.
Args:
name(str): Argument name to which specified dtypes are passed.
no_float16(bool): If ``True``, ``numpy.float16`` is
omitted from candidate dtypes.
dtypes to be tested are ``numpy.float16`` (optional), ``numpy.float32``,
and ``numpy.float64``.
.. seealso:: :func:`cupy.testing.for_dtypes`,
:func:`cupy.testing.for_all_dtypes`
"""
if no_float16:
return for_dtypes(_regular_float_dtypes, name=name)
else:
return for_dtypes(_float_dtypes, name=name)
def for_all_dtypes_combination(names=('dtyes',),
no_float16=False, no_bool=False, full=None,
no_complex=False):
"""Decorator that checks the fixture with a product set of all dtypes.
Args:
names(list of str): Argument names to which dtypes are passed.
no_float16(bool): If ``True``, ``numpy.float16`` is
omitted from candidate dtypes.
no_bool(bool): If ``True``, ``numpy.bool_`` is
omitted from candidate dtypes.
full(bool): If ``True``, then all combinations of dtypes
will be tested.
Otherwise, the subset of combinations will be tested
(see description in :func:`cupy.testing.for_dtypes_combination`).
no_complex(bool): If, True, ``numpy.complex64`` and
``numpy.complex128`` are omitted from candidate dtypes.
.. seealso:: :func:`cupy.testing.for_dtypes_combination`
"""
types = _make_all_dtypes(no_float16, no_bool, no_complex)
return for_dtypes_combination(types, names, full)
def convert_atoms(self, row):
numbers = row.get('numbers')
positions = row.get('positions').astype(self.floatX)
pbc = row.get('pbc')
cell = row.get('cell').astype(self.floatX)
features = [numbers, positions, cell, pbc]
for k in list(self.kvp.keys()):
f = row[k]
if np.isscalar(f):
f = np.array([f])
if f.dtype in [np.float16, np.float32, np.float64]:
f = f.astype(self.floatX)
features.append(f)
for k in list(self.data.keys()):
f = np.array(row.data[k])
if np.isscalar(f):
f = np.array([f])
if f.dtype in [np.float16, np.float32, np.float64]:
f = f.astype(self.floatX)
features.append(f)
return features
def numpy2bifrost(dtype):
if dtype == np.int8: return _bf.BF_DTYPE_I8
elif dtype == np.int16: return _bf.BF_DTYPE_I16
elif dtype == np.int32: return _bf.BF_DTYPE_I32
elif dtype == np.uint8: return _bf.BF_DTYPE_U8
elif dtype == np.uint16: return _bf.BF_DTYPE_U16
elif dtype == np.uint32: return _bf.BF_DTYPE_U32
elif dtype == np.float16: return _bf.BF_DTYPE_F16
elif dtype == np.float32: return _bf.BF_DTYPE_F32
elif dtype == np.float64: return _bf.BF_DTYPE_F64
elif dtype == np.float128: return _bf.BF_DTYPE_F128
elif dtype == ci8: return _bf.BF_DTYPE_CI8
elif dtype == ci16: return _bf.BF_DTYPE_CI16
elif dtype == ci32: return _bf.BF_DTYPE_CI32
elif dtype == cf16: return _bf.BF_DTYPE_CF16
elif dtype == np.complex64: return _bf.BF_DTYPE_CF32
elif dtype == np.complex128: return _bf.BF_DTYPE_CF64
elif dtype == np.complex256: return _bf.BF_DTYPE_CF128
else: raise ValueError("Unsupported dtype: " + str(dtype))
def numpy2string(dtype):
if dtype == np.int8: return 'i8'
elif dtype == np.int16: return 'i16'
elif dtype == np.int32: return 'i32'
elif dtype == np.int64: return 'i64'
elif dtype == np.uint8: return 'u8'
elif dtype == np.uint16: return 'u16'
elif dtype == np.uint32: return 'u32'
elif dtype == np.uint64: return 'u64'
elif dtype == np.float16: return 'f16'
elif dtype == np.float32: return 'f32'
elif dtype == np.float64: return 'f64'
elif dtype == np.float128: return 'f128'
elif dtype == np.complex64: return 'cf32'
elif dtype == np.complex128: return 'cf64'
elif dtype == np.complex256: return 'cf128'
else: raise TypeError("Unsupported dtype: " + str(dtype))
def render_rgb(self, camera_idx):
cameraPos = [(0.0, 0.75, 0.75), (0.75, 0.0, 0.75)][camera_idx]
targetPos = (0, 0, 0.3)
cameraUp = (0, 0, 1)
nearVal, farVal = 1, 20
fov = 60
_w, _h, rgba, _depth, _objects = p.renderImage(self.render_width, self.render_height,
cameraPos, targetPos, cameraUp,
nearVal, farVal, fov)
# convert from 1d uint8 array to (H,W,3) hacky hardcode whitened float16 array.
# TODO: for storage concerns could just store this as uint8 (which it is)
# and normalise 0->1 + whiten later.
rgba_img = np.reshape(np.asarray(rgba, dtype=np.float16),
(self.render_height, self.render_width, 4))
rgb_img = rgba_img[:,:,:3] # slice off alpha, always 1.0
rgb_img /= 255
return rgb_img
def _typename(t):
if t == np.float16:
return 'float16'
elif t == np.float32:
return 'float32'
elif t == np.float64:
return 'float64'
elif t == np.uint8:
return 'uint8'
elif t == np.uint16:
return 'uint16'
elif t == np.int16:
return 'int16'
elif t == np.int32:
return 'int32'
elif t == np.int64:
return 'int64'
else:
raise TypeError('unknown type')
def default(self, obj):
# convert dates and numpy objects in a json serializable format
if isinstance(obj, datetime):
return obj.strftime('%Y-%m-%dT%H:%M:%SZ')
elif isinstance(obj, date):
return obj.strftime('%Y-%m-%d')
elif type(obj) in (np.int_, np.intc, np.intp, np.int8, np.int16,
np.int32, np.int64, np.uint8, np.uint16,
np.uint32, np.uint64):
return int(obj)
elif type(obj) in (np.bool_,):
return bool(obj)
elif type(obj) in (np.float_, np.float16, np.float32, np.float64,
np.complex_, np.complex64, np.complex128):
return float(obj)
# Let the base class default method raise the TypeError
return json.JSONEncoder.default(self, obj)
def test_sum(self):
for dt in (np.int, np.float16, np.float32, np.float64, np.longdouble):
for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127,
128, 1024, 1235):
tgt = dt(v * (v + 1) / 2)
d = np.arange(1, v + 1, dtype=dt)
assert_almost_equal(np.sum(d), tgt)
assert_almost_equal(np.sum(d[::-1]), tgt)
d = np.ones(500, dtype=dt)
assert_almost_equal(np.sum(d[::2]), 250.)
assert_almost_equal(np.sum(d[1::2]), 250.)
assert_almost_equal(np.sum(d[::3]), 167.)
assert_almost_equal(np.sum(d[1::3]), 167.)
assert_almost_equal(np.sum(d[::-2]), 250.)
assert_almost_equal(np.sum(d[-1::-2]), 250.)
assert_almost_equal(np.sum(d[::-3]), 167.)
assert_almost_equal(np.sum(d[-1::-3]), 167.)
# sum with first reduction entry != 0
d = np.ones((1,), dtype=dt)
d += d
assert_almost_equal(d, 2.)
def setUp(self):
# An array of all possible float16 values
self.all_f16 = np.arange(0x10000, dtype=uint16)
self.all_f16.dtype = float16
self.all_f32 = np.array(self.all_f16, dtype=float32)
self.all_f64 = np.array(self.all_f16, dtype=float64)
# An array of all non-NaN float16 values, in sorted order
self.nonan_f16 = np.concatenate(
(np.arange(0xfc00, 0x7fff, -1, dtype=uint16),
np.arange(0x0000, 0x7c01, 1, dtype=uint16)))
self.nonan_f16.dtype = float16
self.nonan_f32 = np.array(self.nonan_f16, dtype=float32)
self.nonan_f64 = np.array(self.nonan_f16, dtype=float64)
# An array of all finite float16 values, in sorted order
self.finite_f16 = self.nonan_f16[1:-1]
self.finite_f32 = self.nonan_f32[1:-1]
self.finite_f64 = self.nonan_f64[1:-1]
def test_half_values(self):
"""Confirms a small number of known half values"""
a = np.array([1.0, -1.0,
2.0, -2.0,
0.0999755859375, 0.333251953125, # 1/10, 1/3
65504, -65504, # Maximum magnitude
2.0**(-14), -2.0**(-14), # Minimum normal
2.0**(-24), -2.0**(-24), # Minimum subnormal
0, -1/1e1000, # Signed zeros
np.inf, -np.inf])
b = np.array([0x3c00, 0xbc00,
0x4000, 0xc000,
0x2e66, 0x3555,
0x7bff, 0xfbff,
0x0400, 0x8400,
0x0001, 0x8001,
0x0000, 0x8000,
0x7c00, 0xfc00], dtype=uint16)
b.dtype = float16
assert_equal(a, b)
def test_half_ordering(self):
"""Make sure comparisons are working right"""
# All non-NaN float16 values in reverse order
a = self.nonan_f16[::-1].copy()
# 32-bit float copy
b = np.array(a, dtype=float32)
# Should sort the same
a.sort()
b.sort()
assert_equal(a, b)
# Comparisons should work
assert_((a[:-1] <= a[1:]).all())
assert_(not (a[:-1] > a[1:]).any())
assert_((a[1:] >= a[:-1]).all())
assert_(not (a[1:] < a[:-1]).any())
# All != except for +/-0
assert_equal(np.nonzero(a[:-1] < a[1:])[0].size, a.size-2)
assert_equal(np.nonzero(a[1:] > a[:-1])[0].size, a.size-2)
def test_half_coercion(self):
"""Test that half gets coerced properly with the other types"""
a16 = np.array((1,), dtype=float16)
a32 = np.array((1,), dtype=float32)
b16 = float16(1)
b32 = float32(1)
assert_equal(np.power(a16, 2).dtype, float16)
assert_equal(np.power(a16, 2.0).dtype, float16)
assert_equal(np.power(a16, b16).dtype, float16)
assert_equal(np.power(a16, b32).dtype, float16)
assert_equal(np.power(a16, a16).dtype, float16)
assert_equal(np.power(a16, a32).dtype, float32)
assert_equal(np.power(b16, 2).dtype, float64)
assert_equal(np.power(b16, 2.0).dtype, float64)
assert_equal(np.power(b16, b16).dtype, float16)
assert_equal(np.power(b16, b32).dtype, float32)
assert_equal(np.power(b16, a16).dtype, float16)
assert_equal(np.power(b16, a32).dtype, float32)
assert_equal(np.power(a32, a16).dtype, float32)
assert_equal(np.power(a32, b16).dtype, float32)
assert_equal(np.power(b32, a16).dtype, float16)
assert_equal(np.power(b32, b16).dtype, float32)
def test_values_single(self):
time_delay_sec = 0.005
tdg = TimeDelayedGenerator(generator=ConstantValueGenerator(21, dtype=np.uint16), time_delay_sec=time_delay_sec)
start_time = datetime.datetime.now()
for _ in range(100):
tdg.get_single()
end_time = datetime.datetime.now()
elapsed_timedelta = (end_time - start_time)
assert datetime.timedelta(seconds=.47) <= elapsed_timedelta <= datetime.timedelta(seconds=.53)
tdg = TimeDelayedGenerator(generator=ConstantValueGenerator(21, dtype=np.uint16),
time_delay_generator=ConstantValueGenerator(time_delay_sec, dtype=np.float16))
start_time = datetime.datetime.now()
for _ in range(100):
tdg.get_single()
end_time = datetime.datetime.now()
elapsed_timedelta = (end_time - start_time)
assert datetime.timedelta(seconds=.47) <= elapsed_timedelta <= datetime.timedelta(seconds=.53)
def test_values_batch(self):
time_delay_sec = 0.0005
tdg = TimeDelayedGenerator(generator=ConstantValueGenerator(21, dtype=np.uint16), time_delay_sec=time_delay_sec)
start_time = datetime.datetime.now()
for _ in range(10):
tdg.get_batch(100)
end_time = datetime.datetime.now()
elapsed_timedelta = (end_time - start_time)
assert datetime.timedelta(seconds=.47) <= elapsed_timedelta <= datetime.timedelta(seconds=.53)
tdg = TimeDelayedGenerator(generator=ConstantValueGenerator(21, dtype=np.uint16),
time_delay_generator=ConstantValueGenerator(time_delay_sec, dtype=np.float16))
start_time = datetime.datetime.now()
for _ in range(10):
tdg.get_batch(100)
end_time = datetime.datetime.now()
elapsed_timedelta = (end_time - start_time)
assert datetime.timedelta(seconds=.47) <= elapsed_timedelta <= datetime.timedelta(seconds=.53)
def _build_indicator(self, span, **kwds):
"""indicator????????????
Args:
span: ??????????
"""
def get_direction(val1, val2):
if np.isnan(val1) or np.isnan(val1):
return np.nan
elif val1 < val2:
return const.INDI_DIRECTION_UP # ???
elif val1 > val2:
return const.INDI_DIRECTION_DOWN # ???
else:
return const.INDI_DIRECTION_HR # ??
ma = MovingAverageIndicator(stock=self.stock, span=span)
arr1 = ma.shifted(-1) # ???????????
arr2 = ma.data # ???????
return np.array([get_direction(a, b) for a, b
in zip(arr1, arr2)], dtype=np.float16)
def __init__(self, config, model_dir):
self.model_dir = model_dir
self.cnn_format = config.cnn_format
self.memory_size = config.memory_size
self.actions = np.empty(self.memory_size, dtype = np.uint8)
self.rewards = np.empty(self.memory_size, dtype = np.integer)
self.screens = np.empty((self.memory_size, config.screen_height, config.screen_width), dtype = np.float16)
self.terminals = np.empty(self.memory_size, dtype = np.bool)
self.history_length = config.history_length
self.dims = (config.screen_height, config.screen_width)
self.batch_size = config.batch_size
self.count = 0
self.current = 0
# pre-allocate prestates and poststates for minibatch
self.prestates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.float16)
self.poststates = np.empty((self.batch_size, self.history_length) + self.dims, dtype = np.float16)
def for_float_dtypes(name='dtype', no_float16=False):
"""Decorator that checks the fixture with all float dtypes.
Args:
name(str): Argument name to which specified dtypes are passed.
no_float16(bool): If, True, ``numpy.float16`` is
omitted from candidate dtypes.
dtypes to be tested are ``numpy.float16`` (optional), ``numpy.float32``,
and ``numpy.float64``.
.. seealso:: :func:`cupy.testing.for_dtypes`,
:func:`cupy.testing.for_all_dtypes`
"""
if no_float16:
return for_dtypes(_regular_float_dtypes, name=name)
else:
return for_dtypes(_float_dtypes, name=name)
def for_all_dtypes_combination(names=['dtyes'],
no_float16=False, no_bool=False, full=None):
"""Decorator that checks the fixture with a product set of all dtypes.
Args:
names(list of str): Argument names to which dtypes are passed.
no_float16(bool): If ``True``, ``numpy.float16`` is
omitted from candidate dtypes.
no_bool(bool): If ``True``, ``numpy.bool_`` is
omitted from candidate dtypes.
full(bool): If ``True``, then all combinations of dtypes
will be tested.
Otherwise, the subset of combinations will be tested
(see description in :func:`cupy.testing.for_dtypes_combination`).
.. seealso:: :func:`cupy.testing.for_dtypes_combination`
"""
types = _make_all_dtypes(no_float16, no_bool)
return for_dtypes_combination(types, names, full)
def forward(self, x):
xp = cuda.get_array_module(*x)
if (xp != numpy and cuda.cudnn_enabled and self.use_cudnn and
(_cudnn_version >= 3000 or x[0].dtype != numpy.float16)):
oz_dtype = 'd' if x[0].dtype == 'd' else 'f'
one = numpy.array(1, dtype=oz_dtype).ctypes
zero = numpy.array(0, dtype=oz_dtype).ctypes
handle = cudnn.get_handle()
x_cube = x[0].reshape(x[0].shape[:2] + (-1, 1))
desc = cudnn.create_tensor_descriptor(x_cube)
self.y = xp.empty_like(x[0])
libcudnn.softmaxForward(
handle, _algorithm, _mode, one.data, desc.value,
x_cube.data.ptr, zero.data, desc.value,
self.y.data.ptr)
else:
self.y = x[0] - x[0].max(axis=1, keepdims=True)
xp.exp(self.y, out=self.y)
self.y /= self.y.sum(axis=1, keepdims=True)
return self.y,
def check_forward(self, op, x_data, gpu, positive):
value = self.value
if positive:
value = numpy.abs(value)
v = value
if gpu:
v = cuda.to_gpu(v)
x = chainer.Variable(x_data)
y = op(x, v)
if self.dtype == numpy.float16:
tol = 1e-3
else:
tol = 1e-6
gradient_check.assert_allclose(
op(self.x, value), y.data, atol=tol, rtol=tol)