CartPole
unknown
python
a year ago
1.5 kB
5
Indexable
Never
import numpy as np import math import random import gym env = gym.make("CartPole-v0") buckets = (1, 1, 6, 12) Q_Table = np.zeros(buckets + (2,)) min_epsilon = 0.02 epsilon = 1 lr = 0.01 gamma = 0.99 episodes = 100000 max_iterations = 250 rewards = [] def discretize_state(state): upper_bounds = [env.observation_space.high[0], 0.5, env.observation_space.high[2], math.radians(50) / 1.] lower_bounds = [env.observation_space.low[0], -0.5, env.observation_space.low[2], -math.radians(50) / 1.] ratios = [(state[i] + abs(lower_bounds[i])) / (upper_bounds[i] - lower_bounds[i]) for i in range(len(state))] state_ = [int(round((buckets[i] - 1) * ratios[i])) for i in range(len(state))] state_ = [min(buckets[i] - 1, max(0, state_[i])) for i in range(len(state))] return tuple(state_) for i in range(episodes): state = discretize_state(env.reset()) total_reward = 0 render = i % 2000 == 0 for j in range(max_iterations): if render: env.render() action = 0 if random.uniform(0, 1) < epsilon: action = env.action_space.sample() else: action = np.argmax(Q_Table[state]) new_state, reward, done, _ = env.step(action) new_state = discretize_state(new_state) Q_Table[state][action] += lr * (reward + gamma * np.max(Q_Table[new_state]) - Q_Table[state][action]) total_reward += reward if done: break state = new_state epsilon = max(min_epsilon, min(1, 1 - np.log10((i + 1) / 100)))