Untitled

File "...\Anaconda\section02_24.py", line 39, in <module> action = np.argmax(q_table[state]) # Find the action that gives max value correspond to the state. IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
mail@pastecode.io avatar
unknown
python
2 years ago
1.8 kB
6
Indexable
Never
# -*- coding: utf-8 -*-
"""
Created on Wed Dec  7 00:24:52 2022

@author: ahmet
"""

import gym
import numpy as np
import random
import matplotlib.pyplot as plt

env = gym.make("Taxi-v3").env

# Q-Table
q_table = np.zeros([env.observation_space.n, env.action_space.n]) # 500 Row, 6 Colomn

# Hyper Parameters
alpha = 0.1
gamma = 0.9
epsilon = 0.1 # 10% to explore, 90% exploit

# Plotting Metrix
reward_list = []
dropout_list = []

episode_number = 10000
for i in range(1, episode_number):
    # init the env. 
    state = env.reset()
    reward_count = 0
    dropout_count = 0 
    
    while True:
        # exploit vs explore
        if random.uniform(0, 1) < epsilon :
            action = env.action_space.sample() # Pick a random action.
        else:
            action = np.argmax(q_table[state]) # Find the action that gives max value correspond to the state.
        
        # Process the action & observe the results
        next_state, reward, done, _, _ = env.step(action)
        
        # Update the Q-Table
        ## old_value
        old_value = q_table[state, action]
            
        ## next_max
        next_max = np.max(q_table[next_state])
            
        next_value = (1-alpha)*old_value + alpha*(reward + gamma*next_max)
        q_table[state,action] = next_value        
        
        # Update the state
        state = next_state
        
        # Find the wrong drop-offs
        if reward == -10:
            dropout_count += 1
            
        reward_count += reward
        
        if done:
            break
    
    
    dropout_list.append(dropout_count)
    reward_list.append(reward_count)
    
    if i % 10 == 0:
        print("Episode: {}, Reward: {}, Wrong Drop Outs: {}".format(i, reward_count, dropout_count()))