def train_agent(rounds=10000, use_score=False, name='result_dir', create_agent=create_ddqn_agent):
ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
env = gym.make( ENV_NAME )
np.random.seed(123)
env.seed(123)
agent = create_agent(env)
chainerrl.experiments.train_agent_with_evaluation(
agent, env,
steps=rounds, # Train the agent for this many rounds steps
max_episode_len=env.maxturns, # Maximum length of each episodes
eval_interval=1000, # Evaluate the agent after every 1000 steps
eval_n_runs=100, # 100 episodes are sampled for each evaluation
outdir=name) # Save everything to 'result' directory
return agent
评论列表
文章目录