q-learning example
unknown
python
a year ago
1.9 kB
11
Indexable
import numpy as np
import matplotlib.pyplot as plt
# Environment definition
n_states = 16 # 4x4 Grid World
n_actions = 4 # UP, DOWN, LEFT, RIGHT
goal_state = 15 # Terminal state
Q_table = np.zeros((n_states, n_actions)) # Initialize Q-table
# Parameters
learning_rate = 0.8
discount_factor = 0.95
epsilon = 1.0 # Exploration probability
epsilon_decay = 0.99
epsilon_min = 0.01
epochs = 1000
# Function to get next state based on action
def get_next_state(state, action):
row, col = divmod(state, 4) # 4x4 grid coordinates
if action == 0: row = max(0, row - 1) # UP
if action == 1: row = min(3, row + 1) # DOWN
if action == 2: col = max(0, col - 1) # LEFT
if action == 3: col = min(3, col + 1) # RIGHT
return row * 4 + col # Return new state index
# Q-Learning algorithm
rewards_per_episode = []
for epoch in range(epochs):
state = 0 # Start at state 0
total_reward = 0
while state != goal_state:
# Epsilon-greedy action selection
if np.random.rand() < epsilon:
action = np.random.randint(0, n_actions) # Explore
else:
action = np.argmax(Q_table[state]) # Exploit
next_state = get_next_state(state, action)
reward = 1.0 if next_state == goal_state else -0.1
total_reward += reward
# Update Q-table
Q_table[state, action] += learning_rate * (
reward + discount_factor * np.max(Q_table[next_state]) - Q_table[state, action]
)
state = next_state # Move to next state
epsilon = max(epsilon_min, epsilon * epsilon_decay) # Decay epsilon
rewards_per_episode.append(total_reward)
# Visualization of rewards over episodes
plt.plot(rewards_per_episode)
plt.title("Q-Learning Performance Over Episodes")
plt.xlabel("Episodes")
plt.ylabel("Total Reward")
plt.grid()
plt.show()
Editor is loading...
Leave a Comment