Untitled
File "...\Anaconda\section02_24.py", line 39, in <module> action = np.argmax(q_table[state]) # Find the action that gives max value correspond to the state. IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indicesunknown
python
3 years ago
1.8 kB
9
Indexable
# -*- coding: utf-8 -*- """ Created on Wed Dec 7 00:24:52 2022 @author: ahmet """ import gym import numpy as np import random import matplotlib.pyplot as plt env = gym.make("Taxi-v3").env # Q-Table q_table = np.zeros([env.observation_space.n, env.action_space.n]) # 500 Row, 6 Colomn # Hyper Parameters alpha = 0.1 gamma = 0.9 epsilon = 0.1 # 10% to explore, 90% exploit # Plotting Metrix reward_list = [] dropout_list = [] episode_number = 10000 for i in range(1, episode_number): # init the env. state = env.reset() reward_count = 0 dropout_count = 0 while True: # exploit vs explore if random.uniform(0, 1) < epsilon : action = env.action_space.sample() # Pick a random action. else: action = np.argmax(q_table[state]) # Find the action that gives max value correspond to the state. # Process the action & observe the results next_state, reward, done, _, _ = env.step(action) # Update the Q-Table ## old_value old_value = q_table[state, action] ## next_max next_max = np.max(q_table[next_state]) next_value = (1-alpha)*old_value + alpha*(reward + gamma*next_max) q_table[state,action] = next_value # Update the state state = next_state # Find the wrong drop-offs if reward == -10: dropout_count += 1 reward_count += reward if done: break dropout_list.append(dropout_count) reward_list.append(reward_count) if i % 10 == 0: print("Episode: {}, Reward: {}, Wrong Drop Outs: {}".format(i, reward_count, dropout_count()))
Editor is loading...