def __act_manual(self, state_meas):
if len(self.__measure_for_manual):
# [AMMO2, AMMO3, AMMO4, AMMO5, AMMO6, AMMO7, WEAPON2,
# WEAPON3 WEAPON4 WEAPON5 WEAPON6 WEAPON7 SELECTED_WEAPON]
assert len(self.__measure_for_manual) == 13
# [SELECT_WEAPON2 SELECT_WEAPON3 SELECT_WEAPON4 SELECT_WEAPON5 SELECT_WEAPON6 SELECT_WEAPON7]
curr_action = np.zeros((state_meas.shape[0], self.__num_manual_controls), dtype=np.int)
for ns in range(state_meas.shape[0]):
curr_ammo = state_meas[ns, self.__measure_for_manual[:6]]
curr_weapons = state_meas[ns, self.__measure_for_manual[6:12]]
if self.verbose:
print 'current ammo:', curr_ammo
print 'current weapons:', curr_weapons
available_weapons = np.logical_and(curr_ammo >= np.array([1, 2, 1, 1, 1, 40]), curr_weapons)
if any(available_weapons):
best_weapon = np.nonzero(available_weapons)[0][-1]
if not state_meas[ns, self.__measure_for_manual[12]] == best_weapon + 2:
curr_action[ns, best_weapon] = 1
return curr_action
else:
return []
评论列表
文章目录