Untitled
unknown
python
6 months ago
4.2 kB
3
Indexable
import numpy as np import matplotlib.pyplot as plt def simulate_payoff_model(n_rounds, k): """ Simulate the payoff model over n_rounds rounds with k actions. Returns the payoff matrix of shape (n_rounds, k). """ payoffs = np.zeros((n_rounds, k)) # Set initial payoffs for Actions 1 and 2 payoffs[0, 0] = 0.5 # Action 1 gets payoff 1/2 in Round 1 # Action 2 gets payoff 0 in Round 1 (already zero) # Generate payoffs for subsequent rounds for i in range(1, n_rounds): if i % 2 == 1: # Odd rounds (Round 2, 4, ...) payoffs[i, 1] = 1 # Action 2 gets payoff 1 else: # Even rounds (Round 3, 5, ...) payoffs[i, 0] = 1 # Action 1 gets payoff 1 # Actions 3 to k remain at zero payoffs return payoffs def follow_the_leader(payoffs): """ Simulate the Follow the Leader algorithm. Returns the cumulative regret over rounds. """ n_rounds, k = payoffs.shape cumulative_payoffs = np.zeros(k) cumulative_regret = [] best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1)) for i in range(n_rounds): if i == 0: # First round: pick any action (choose Action 0) action = 0 else: # Choose the action with the highest cumulative payoff so far action = np.argmax(cumulative_payoffs) # Receive payoff payoff = payoffs[i, action] cumulative_payoffs[action] += payoff # Calculate cumulative regret total_payoff = np.sum(cumulative_payoffs) regret = best_fixed_action_payoff[i] - total_payoff cumulative_regret.append(regret) return cumulative_regret def random_guessing(payoffs): """ Simulate Uniform Random Guessing. Returns the cumulative regret over rounds. """ n_rounds, k = payoffs.shape cumulative_payoff = 0 cumulative_regret = [] best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1)) for i in range(n_rounds): # Randomly select an action action = np.random.randint(k) # Receive payoff payoff = payoffs[i, action] cumulative_payoff += payoff # Calculate cumulative regret regret = best_fixed_action_payoff[i] - cumulative_payoff cumulative_regret.append(regret) return cumulative_regret def exponential_weights(payoffs, epsilon): """ Simulate the Exponential Weights algorithm. Returns the cumulative regret over rounds. """ n_rounds, k = payoffs.shape weights = np.ones(k) cumulative_payoff = 0 cumulative_regret = [] best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1)) for i in range(n_rounds): # Compute probabilities probabilities = weights / np.sum(weights) # Sample an action according to the probabilities action = np.random.choice(k, p=probabilities) # Receive payoff payoff = payoffs[i, action] cumulative_payoff += payoff # Update weights weights *= np.exp(epsilon * payoffs[i]) # Calculate cumulative regret regret = best_fixed_action_payoff[i] - cumulative_payoff cumulative_regret.append(regret) return cumulative_regret def main(): n_rounds = 1000 # Number of rounds to simulate k = 5 # Number of actions (k > 2) payoffs = simulate_payoff_model(n_rounds, k) # Theoretical optimal learning rate for EW epsilon = np.sqrt(np.log(k) / n_rounds) # Run simulations regret_ftl = follow_the_leader(payoffs) regret_random = random_guessing(payoffs) regret_ew = exponential_weights(payoffs, epsilon) # Plot cumulative regret rounds = np.arange(1, n_rounds + 1) plt.figure(figsize=(12, 6)) plt.plot(rounds, regret_ftl, label='Follow the Leader') plt.plot(rounds, regret_random, label='Uniform Random Guessing') plt.plot(rounds, regret_ew, label='Exponential Weights') plt.xlabel('Round') plt.ylabel('Cumulative Regret') plt.title('Cumulative Regret vs. Number of Rounds') plt.legend() plt.grid(True) plt.show() if __name__ == '__main__': main()
Editor is loading...
Leave a Comment