Untitled

mail@pastecode.io avatar
unknown
python
2 years ago
1.2 kB
0
Indexable
Never
def evaluate_on_environment(
    env: gym.Env, n_trials: int = 10, epsilon: float = 0.0, render: bool = False
) -> Callable[..., float]:

    def scorer(algo: AlgoProtocol, *args: Any) -> float:
        futures = []
        episode_rewards = []

        with mp.Pool(mp.cpu_count()-1) as pool:
            for _ in range(n_trials):
                local_env = copy.deepcopy(env)
                reward = pool.apply_async(func=evaluate,args=(local_env,algo,epsilon))
                futures.append(reward)
            pool.close()
            pool.join()
        episode_rewards = [item.get() for item in futures]
        print(episode_rewards)
            
        return float(np.mean(episode_rewards))

    return scorer

def evaluate(env: gym.Env, epsilon: float, algo: AlgoProtocol):
    observation = env.reset()
    episode_reward = 0.0

    while True:
        # take action
        if np.random.random() < epsilon:
            action = env.action_space.sample()
        else:
            action = algo.predict([observation])[0]

        observation, reward, done, _ = env.step(action)
        episode_reward += reward


        if done:
            break
    return episode_reward