Untitled
unknown
python
a year ago
4.2 kB
4
Indexable
import numpy as np
import matplotlib.pyplot as plt
def simulate_payoff_model(n_rounds, k):
"""
Simulate the payoff model over n_rounds rounds with k actions.
Returns the payoff matrix of shape (n_rounds, k).
"""
payoffs = np.zeros((n_rounds, k))
# Set initial payoffs for Actions 1 and 2
payoffs[0, 0] = 0.5 # Action 1 gets payoff 1/2 in Round 1
# Action 2 gets payoff 0 in Round 1 (already zero)
# Generate payoffs for subsequent rounds
for i in range(1, n_rounds):
if i % 2 == 1:
# Odd rounds (Round 2, 4, ...)
payoffs[i, 1] = 1 # Action 2 gets payoff 1
else:
# Even rounds (Round 3, 5, ...)
payoffs[i, 0] = 1 # Action 1 gets payoff 1
# Actions 3 to k remain at zero payoffs
return payoffs
def follow_the_leader(payoffs):
"""
Simulate the Follow the Leader algorithm.
Returns the cumulative regret over rounds.
"""
n_rounds, k = payoffs.shape
cumulative_payoffs = np.zeros(k)
cumulative_regret = []
best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1))
for i in range(n_rounds):
if i == 0:
# First round: pick any action (choose Action 0)
action = 0
else:
# Choose the action with the highest cumulative payoff so far
action = np.argmax(cumulative_payoffs)
# Receive payoff
payoff = payoffs[i, action]
cumulative_payoffs[action] += payoff
# Calculate cumulative regret
total_payoff = np.sum(cumulative_payoffs)
regret = best_fixed_action_payoff[i] - total_payoff
cumulative_regret.append(regret)
return cumulative_regret
def random_guessing(payoffs):
"""
Simulate Uniform Random Guessing.
Returns the cumulative regret over rounds.
"""
n_rounds, k = payoffs.shape
cumulative_payoff = 0
cumulative_regret = []
best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1))
for i in range(n_rounds):
# Randomly select an action
action = np.random.randint(k)
# Receive payoff
payoff = payoffs[i, action]
cumulative_payoff += payoff
# Calculate cumulative regret
regret = best_fixed_action_payoff[i] - cumulative_payoff
cumulative_regret.append(regret)
return cumulative_regret
def exponential_weights(payoffs, epsilon):
"""
Simulate the Exponential Weights algorithm.
Returns the cumulative regret over rounds.
"""
n_rounds, k = payoffs.shape
weights = np.ones(k)
cumulative_payoff = 0
cumulative_regret = []
best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1))
for i in range(n_rounds):
# Compute probabilities
probabilities = weights / np.sum(weights)
# Sample an action according to the probabilities
action = np.random.choice(k, p=probabilities)
# Receive payoff
payoff = payoffs[i, action]
cumulative_payoff += payoff
# Update weights
weights *= np.exp(epsilon * payoffs[i])
# Calculate cumulative regret
regret = best_fixed_action_payoff[i] - cumulative_payoff
cumulative_regret.append(regret)
return cumulative_regret
def main():
n_rounds = 1000 # Number of rounds to simulate
k = 5 # Number of actions (k > 2)
payoffs = simulate_payoff_model(n_rounds, k)
# Theoretical optimal learning rate for EW
epsilon = np.sqrt(np.log(k) / n_rounds)
# Run simulations
regret_ftl = follow_the_leader(payoffs)
regret_random = random_guessing(payoffs)
regret_ew = exponential_weights(payoffs, epsilon)
# Plot cumulative regret
rounds = np.arange(1, n_rounds + 1)
plt.figure(figsize=(12, 6))
plt.plot(rounds, regret_ftl, label='Follow the Leader')
plt.plot(rounds, regret_random, label='Uniform Random Guessing')
plt.plot(rounds, regret_ew, label='Exponential Weights')
plt.xlabel('Round')
plt.ylabel('Cumulative Regret')
plt.title('Cumulative Regret vs. Number of Rounds')
plt.legend()
plt.grid(True)
plt.show()
if __name__ == '__main__':
main()
Editor is loading...
Leave a Comment