Untitled
unknown
python
a year ago
1.7 kB
12
Indexable
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
def run(episodes, render = False):
env = gym.make("FrozenLake-v1", map_name="8x8", is_slippery=False, render_mode="human" if render else None)
q = np.zeros(shape=(env.observation_space.n, env.action_space.n))
learning_rate = 0.9
discount_factor_g = 0.9
epsilon = 1
epsilon_decay_rate = 0.0001
count = 0
rng = np.random.default_rng()
rewards_per_episode = np.zeros(episodes)
for i in range(episodes):
state = env.reset()[0]
truncated = False
terminated = False
while not (truncated or terminated):
if rng.random() < epsilon:
action = env.action_space.sample()
else:
action = np.argmax(q[state, :])
newstate, reward, terminated, truncated, _ = env.step(action)
# q[state, action] = q[state, action] + learning_rate * (
# reward + discount_factor_g * np.max(q[newstate, :]) - q[state, action]
# )
q[state, action] = q[state, action] + learning_rate * (reward + (discount_factor_g * np.max(q[newstate, :]) - q[state, action]))
state = newstate
epsilon = max(epsilon - epsilon_decay_rate, 0)
if (epsilon == 0):
learning_rate = 0.0001
if reward == 1:
rewards_per_episode[i] = 1
env.close()
sum_rewards = np.zeros(episodes)
for t in range(episodes):
sum_rewards[t] = np.sum(rewards_per_episode[max(0, t - 100):(t + 1)])
plt.plot(sum_rewards)
plt.savefig('frozen_lake8x8.png')
if __name__ == "__main__":
run(15000)
Editor is loading...
Leave a Comment