Untitled
import gymnasium as gym import numpy as np import matplotlib.pyplot as plt def run(episodes, render = False): env = gym.make("FrozenLake-v1", map_name="8x8", is_slippery=False, render_mode="human" if render else None) q = np.zeros(shape=(env.observation_space.n, env.action_space.n)) learning_rate = 0.9 discount_factor_g = 0.9 epsilon = 1 epsilon_decay_rate = 0.0001 count = 0 rng = np.random.default_rng() rewards_per_episode = np.zeros(episodes) for i in range(episodes): state = env.reset()[0] truncated = False terminated = False while not (truncated or terminated): if rng.random() < epsilon: action = env.action_space.sample() else: action = np.argmax(q[state, :]) newstate, reward, terminated, truncated, _ = env.step(action) # q[state, action] = q[state, action] + learning_rate * ( # reward + discount_factor_g * np.max(q[newstate, :]) - q[state, action] # ) q[state, action] = q[state, action] + learning_rate * (reward + (discount_factor_g * np.max(q[newstate, :]) - q[state, action])) state = newstate epsilon = max(epsilon - epsilon_decay_rate, 0) if (epsilon == 0): learning_rate = 0.0001 if reward == 1: rewards_per_episode[i] = 1 env.close() sum_rewards = np.zeros(episodes) for t in range(episodes): sum_rewards[t] = np.sum(rewards_per_episode[max(0, t - 100):(t + 1)]) plt.plot(sum_rewards) plt.savefig('frozen_lake8x8.png') if __name__ == "__main__": run(15000)
Leave a Comment