Untitled
unknown
plain_text
2 years ago
2.0 kB
6
Indexable
print(env1.observation_space)
print(env1.action_space)
print(env1.P[14][2])
env1.reset()
t_s = 10
eps = 10
total_rewards = [] # List to store total rewards for each episode
episode_policies = [] # List to store policies for each episode
# Q1: Generate multiple episodes and find the total reward in each episode
for j in range(eps):
env1.reset()
print("\nEpisode: ", j + 1)
total_reward = 0 # Initialize total reward for the current episode
episode_policy = [] # List to store actions taken in the current episode
for i in range(t_s):
print("Time step: ", i + 1)
random_action = env1.action_space.sample()
n_s, reward, done, info, trans_prob = env1.step(random_action)
env1.render()
print(n_s, reward, done, info, trans_prob)
# Q2: Append the action to the episode_policy
episode_policy.append(random_action)
total_reward += reward # Add the reward to the total_reward for the episode
if done:
break
# Append the total_reward and episode_policy to their respective lists
total_rewards.append(total_reward)
episode_policies.append(episode_policy)
print("Total Reward for Episode {}: {}".format(j + 1, total_reward))
# Q1: Print the total reward for each episode in the specified format
print("\n[Episode - Total reward]")
for episode, reward in enumerate(total_rewards, 1):
print("[{:8} - {:8}]".format(episode, reward))
# Q2: Find the episode with the highest reward and print the policy
best_episode_index = total_rewards.index(max(total_rewards))
best_episode_policy = episode_policies[best_episode_index]
# Print the policy for the episode with the highest reward
print("\nPolicy for Episode with Highest Reward (Episode {}):".format(best_episode_index + 1))
for step, action in enumerate(best_episode_policy):
print("Step {}: {}".format(step + 1, action))
Editor is loading...
Leave a Comment