q-learning example
unknown
python
4 months ago
1.9 kB
2
Indexable
import numpy as np import matplotlib.pyplot as plt # Environment definition n_states = 16 # 4x4 Grid World n_actions = 4 # UP, DOWN, LEFT, RIGHT goal_state = 15 # Terminal state Q_table = np.zeros((n_states, n_actions)) # Initialize Q-table # Parameters learning_rate = 0.8 discount_factor = 0.95 epsilon = 1.0 # Exploration probability epsilon_decay = 0.99 epsilon_min = 0.01 epochs = 1000 # Function to get next state based on action def get_next_state(state, action): row, col = divmod(state, 4) # 4x4 grid coordinates if action == 0: row = max(0, row - 1) # UP if action == 1: row = min(3, row + 1) # DOWN if action == 2: col = max(0, col - 1) # LEFT if action == 3: col = min(3, col + 1) # RIGHT return row * 4 + col # Return new state index # Q-Learning algorithm rewards_per_episode = [] for epoch in range(epochs): state = 0 # Start at state 0 total_reward = 0 while state != goal_state: # Epsilon-greedy action selection if np.random.rand() < epsilon: action = np.random.randint(0, n_actions) # Explore else: action = np.argmax(Q_table[state]) # Exploit next_state = get_next_state(state, action) reward = 1.0 if next_state == goal_state else -0.1 total_reward += reward # Update Q-table Q_table[state, action] += learning_rate * ( reward + discount_factor * np.max(Q_table[next_state]) - Q_table[state, action] ) state = next_state # Move to next state epsilon = max(epsilon_min, epsilon * epsilon_decay) # Decay epsilon rewards_per_episode.append(total_reward) # Visualization of rewards over episodes plt.plot(rewards_per_episode) plt.title("Q-Learning Performance Over Episodes") plt.xlabel("Episodes") plt.ylabel("Total Reward") plt.grid() plt.show()
Editor is loading...
Leave a Comment