Untitled
unknown
python
2 years ago
1.2 kB
0
Indexable
Never
def evaluate_on_environment( env: gym.Env, n_trials: int = 10, epsilon: float = 0.0, render: bool = False ) -> Callable[..., float]: def scorer(algo: AlgoProtocol, *args: Any) -> float: futures = [] episode_rewards = [] with mp.Pool(mp.cpu_count()-1) as pool: for _ in range(n_trials): local_env = copy.deepcopy(env) reward = pool.apply_async(func=evaluate,args=(local_env,algo,epsilon)) futures.append(reward) pool.close() pool.join() episode_rewards = [item.get() for item in futures] print(episode_rewards) return float(np.mean(episode_rewards)) return scorer def evaluate(env: gym.Env, epsilon: float, algo: AlgoProtocol): observation = env.reset() episode_reward = 0.0 while True: # take action if np.random.random() < epsilon: action = env.action_space.sample() else: action = algo.predict([observation])[0] observation, reward, done, _ = env.step(action) episode_reward += reward if done: break return episode_reward