Taxi Sim Q-Learning
unknown
python
3 years ago
1.7 kB
10
Indexable
import numpy as np
import gym
import time
NEPOCHS = 10000
MAX_ITER = 1000
EPSILON = 0.1
DISCOUNT = 0.6
LEARNING_RATE = 0.1
def train(env, nepochs=NEPOCHS, max_iter=MAX_ITER, epsilon=EPSILON, learning_rate=LEARNING_RATE, discount=DISCOUNT):
qtable = np.zeros((env.observation_space.n, env.action_space.n))
for i in range(nepochs):
obs = env.reset()
for j in range(max_iter):
if np.random.uniform(0, 1) < epsilon:
action = np.random.choice(env.action_space.n)
else:
action = np.argmax(qtable[obs])
next_obs, reward, done, _ = env.step(action)
if done:
break
qtable[obs, action] = qtable[obs, action] + learning_rate * (reward + discount * np.max(qtable[next_obs]) - qtable[obs, action])
obs = next_obs
return qtable
def execute(env, qtable):
count = 0
obs = env.reset()
env.render()
done = False
while not done:
action = np.argmax(qtable[obs])
obs, reward, done, _ = env.step(action)
env.render()
count += 1
return count
def executeTwo(env, qtable):
count = 0
obs = env.reset()
env.render()
done = False
while not done:
action = np.argmax(qtable[obs])
obs, reward, done, _ = env.step(action)
print('\x1b[1J' + env.render(mode='ansi'))
time.sleep(0.5)
count += 1
return count
env = gym.make('Taxi-v3')
qtable = train(env)
result = execute(env, qtable)
## result = executeTwo(env, qtable)
print(result)Editor is loading...