Untitled
unknown
plain_text
2 years ago
2.2 kB
7
Indexable
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 15 13:40:03 2024
@author: Admin
"""
import gymnasium as gym1
env1 = gym1.make("FrozenLake-v1", render_mode='human')
env1.reset()
env1.render()
print(env1.observation_space)
print(env1.action_space)
print(env1.P[14][2])
env1.reset()
t_s = 30
eps = 30
total_rewards = [] # List to store total rewards for each episode
episode_policies = [] # List to store policies for each episode
# Q1: Generate multiple episodes and find the total reward in each episode
for j in range(eps):
env1.reset()
print("\nEpisode: ", j + 1)
total_reward = 0 # Initialize total reward for the current episode
episode_policy = [] # List to store actions taken in the current episode
for i in range(t_s):
print("Time step: ", i + 1)
random_action = env1.action_space.sample()
n_s, reward, done, info, trans_prob = env1.step(random_action)
env1.render()
print(n_s, reward, done, info, trans_prob)
# Q2: Append the action to the episode_policy
episode_policy.append(random_action)
total_reward += reward # Add the reward to the total_reward for the episode
if done:
break
# Append the total_reward and episode_policy to their respective lists
total_rewards.append(total_reward)
episode_policies.append(episode_policy)
print("Total Reward for Episode :".format(j + 1, total_reward))
# Q1: Print the total reward for each episode in the specified format
print("\n(Episode - Total reward)")
for episode, reward in enumerate(total_rewards, 1):
print("[{:8} - {:8}]".format(episode, reward))
# Q2: Find the episode with the highest reward and print the policy
best_episode_index = total_rewards.index(max(total_rewards))
best_episode_policy = episode_policies[best_episode_index]
# Print the policy for the episode with the highest reward
print("\nPolicy for Episode with Highest Reward (Episode):".format(best_episode_index + 1))
for step, action in enumerate(best_episode_policy):
print("Step :".format(step + 1, action))
Editor is loading...
Leave a Comment