def train(net_shapes, net_params, optimizer, utility, pool):
# pass seed instead whole noise matrix to parallel will save your time
noise_seed = np.random.randint(0, 2 ** 32 - 1, size=N_KID, dtype=np.uint32).repeat(2) # mirrored sampling
# distribute training in parallel
jobs = [pool.apply_async(get_reward, (net_shapes, net_params, env, CONFIG['ep_max_step'], CONFIG['continuous_a'],
[noise_seed[k_id], k_id], )) for k_id in range(N_KID*2)]
rewards = np.array([j.get() for j in jobs])
kids_rank = np.argsort(rewards)[::-1] # rank kid id by reward
cumulative_update = np.zeros_like(net_params) # initialize update values
for ui, k_id in enumerate(kids_rank):
np.random.seed(noise_seed[k_id]) # reconstruct noise using seed
cumulative_update += utility[ui] * sign(k_id) * np.random.randn(net_params.size)
gradients = optimizer.get_gradients(cumulative_update/(2*N_KID*SIGMA))
return net_params + gradients, rewards
Evolution Strategy with Neural Nets.py 文件源码
python
阅读 46
收藏 0
点赞 0
评论 0
评论列表
文章目录