Untitled
unknown
python
3 years ago
1.2 kB
9
Indexable
def evaluate_on_environment(
env: gym.Env, n_trials: int = 10, epsilon: float = 0.0, render: bool = False
) -> Callable[..., float]:
def scorer(algo: AlgoProtocol, *args: Any) -> float:
futures = []
episode_rewards = []
with mp.Pool(mp.cpu_count()-1) as pool:
for _ in range(n_trials):
local_env = copy.deepcopy(env)
reward = pool.apply_async(func=evaluate,args=(local_env,algo,epsilon))
futures.append(reward)
pool.close()
pool.join()
episode_rewards = [item.get() for item in futures]
print(episode_rewards)
return float(np.mean(episode_rewards))
return scorer
def evaluate(env: gym.Env, epsilon: float, algo: AlgoProtocol):
observation = env.reset()
episode_reward = 0.0
while True:
# take action
if np.random.random() < epsilon:
action = env.action_space.sample()
else:
action = algo.predict([observation])[0]
observation, reward, done, _ = env.step(action)
episode_reward += reward
if done:
break
return episode_rewardEditor is loading...