Untitled
File "...\Anaconda\section02_24.py", line 39, in <module> action = np.argmax(q_table[state]) # Find the action that gives max value correspond to the state. IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indicesunknown
python
3 years ago
1.8 kB
12
Indexable
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 7 00:24:52 2022
@author: ahmet
"""
import gym
import numpy as np
import random
import matplotlib.pyplot as plt
env = gym.make("Taxi-v3").env
# Q-Table
q_table = np.zeros([env.observation_space.n, env.action_space.n]) # 500 Row, 6 Colomn
# Hyper Parameters
alpha = 0.1
gamma = 0.9
epsilon = 0.1 # 10% to explore, 90% exploit
# Plotting Metrix
reward_list = []
dropout_list = []
episode_number = 10000
for i in range(1, episode_number):
# init the env.
state = env.reset()
reward_count = 0
dropout_count = 0
while True:
# exploit vs explore
if random.uniform(0, 1) < epsilon :
action = env.action_space.sample() # Pick a random action.
else:
action = np.argmax(q_table[state]) # Find the action that gives max value correspond to the state.
# Process the action & observe the results
next_state, reward, done, _, _ = env.step(action)
# Update the Q-Table
## old_value
old_value = q_table[state, action]
## next_max
next_max = np.max(q_table[next_state])
next_value = (1-alpha)*old_value + alpha*(reward + gamma*next_max)
q_table[state,action] = next_value
# Update the state
state = next_state
# Find the wrong drop-offs
if reward == -10:
dropout_count += 1
reward_count += reward
if done:
break
dropout_list.append(dropout_count)
reward_list.append(reward_count)
if i % 10 == 0:
print("Episode: {}, Reward: {}, Wrong Drop Outs: {}".format(i, reward_count, dropout_count()))
Editor is loading...