Untitled

mail@pastecode.io avatar
unknown
python
2 years ago
715 B
1
Indexable
Never
def evaluate_on_environment(
    env: gym.Env, n_trials: int = 10, epsilon: float = 0.0, timeout: int = 30
) -> Callable[..., float]:

    def evaluate(env: gym.Env, algo: AlgoProtocol, epsilon: float):
        return 10.0
    
    def scorer(algo: AlgoProtocol, *args: Any) -> float:
        episode_rewards = []
        
        with mp.Pool() as pool:
            for _ in range(n_trials):
                ret = pool.apply_async(func=evaluate,args=(local_env,algo,epsilon))
                episode_rewards.append(ret)
            pool.close()
            pool.join()
        
        episode_rewards = [reward.get() for reward in episode_rewards]
        return float(np.mean(episode_rewards))

    return scorer