Untitled

mail@pastecode.io avatar
unknown
python
5 months ago
1.7 kB
6
Indexable
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt

def run(episodes, render = False):
    env = gym.make("FrozenLake-v1", map_name="8x8", is_slippery=False, render_mode="human" if render else None)
    q = np.zeros(shape=(env.observation_space.n, env.action_space.n))
    learning_rate = 0.9
    discount_factor_g = 0.9
    epsilon = 1
    epsilon_decay_rate = 0.0001

    count = 0

    rng = np.random.default_rng()

    rewards_per_episode = np.zeros(episodes)

    for i in range(episodes):
        state = env.reset()[0]

        truncated = False
        terminated = False

        while not (truncated or terminated):
            if rng.random() < epsilon:
                action = env.action_space.sample()
            else:
                action = np.argmax(q[state, :])
            newstate, reward, terminated, truncated, _ = env.step(action)

            # q[state, action] = q[state, action] + learning_rate * (
            #         reward + discount_factor_g * np.max(q[newstate, :]) - q[state, action]
            # )
            q[state, action] = q[state, action] + learning_rate * (reward + (discount_factor_g * np.max(q[newstate, :]) - q[state, action]))

            state = newstate

        epsilon = max(epsilon - epsilon_decay_rate, 0)

        if (epsilon == 0):
            learning_rate = 0.0001

        if reward == 1:
            rewards_per_episode[i] = 1

    env.close()

    sum_rewards = np.zeros(episodes)
    for t in range(episodes):
        sum_rewards[t] = np.sum(rewards_per_episode[max(0, t - 100):(t + 1)])
    plt.plot(sum_rewards)
    plt.savefig('frozen_lake8x8.png')


if __name__ == "__main__":
    run(15000)


Leave a Comment