Untitled

 avatar
unknown
python
6 months ago
4.2 kB
3
Indexable
import numpy as np
import matplotlib.pyplot as plt

def simulate_payoff_model(n_rounds, k):
    """
    Simulate the payoff model over n_rounds rounds with k actions.
    Returns the payoff matrix of shape (n_rounds, k).
    """
    payoffs = np.zeros((n_rounds, k))

    # Set initial payoffs for Actions 1 and 2
    payoffs[0, 0] = 0.5  # Action 1 gets payoff 1/2 in Round 1
    # Action 2 gets payoff 0 in Round 1 (already zero)

    # Generate payoffs for subsequent rounds
    for i in range(1, n_rounds):
        if i % 2 == 1:
            # Odd rounds (Round 2, 4, ...)
            payoffs[i, 1] = 1  # Action 2 gets payoff 1
        else:
            # Even rounds (Round 3, 5, ...)
            payoffs[i, 0] = 1  # Action 1 gets payoff 1
        # Actions 3 to k remain at zero payoffs

    return payoffs

def follow_the_leader(payoffs):
    """
    Simulate the Follow the Leader algorithm.
    Returns the cumulative regret over rounds.
    """
    n_rounds, k = payoffs.shape
    cumulative_payoffs = np.zeros(k)
    cumulative_regret = []
    best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1))

    for i in range(n_rounds):
        if i == 0:
            # First round: pick any action (choose Action 0)
            action = 0
        else:
            # Choose the action with the highest cumulative payoff so far
            action = np.argmax(cumulative_payoffs)
        # Receive payoff
        payoff = payoffs[i, action]
        cumulative_payoffs[action] += payoff
        # Calculate cumulative regret
        total_payoff = np.sum(cumulative_payoffs)
        regret = best_fixed_action_payoff[i] - total_payoff
        cumulative_regret.append(regret)

    return cumulative_regret

def random_guessing(payoffs):
    """
    Simulate Uniform Random Guessing.
    Returns the cumulative regret over rounds.
    """
    n_rounds, k = payoffs.shape
    cumulative_payoff = 0
    cumulative_regret = []
    best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1))

    for i in range(n_rounds):
        # Randomly select an action
        action = np.random.randint(k)
        # Receive payoff
        payoff = payoffs[i, action]
        cumulative_payoff += payoff
        # Calculate cumulative regret
        regret = best_fixed_action_payoff[i] - cumulative_payoff
        cumulative_regret.append(regret)

    return cumulative_regret

def exponential_weights(payoffs, epsilon):
    """
    Simulate the Exponential Weights algorithm.
    Returns the cumulative regret over rounds.
    """
    n_rounds, k = payoffs.shape
    weights = np.ones(k)
    cumulative_payoff = 0
    cumulative_regret = []
    best_fixed_action_payoff = np.cumsum(np.max(payoffs, axis=1))

    for i in range(n_rounds):
        # Compute probabilities
        probabilities = weights / np.sum(weights)
        # Sample an action according to the probabilities
        action = np.random.choice(k, p=probabilities)
        # Receive payoff
        payoff = payoffs[i, action]
        cumulative_payoff += payoff
        # Update weights
        weights *= np.exp(epsilon * payoffs[i])
        # Calculate cumulative regret
        regret = best_fixed_action_payoff[i] - cumulative_payoff
        cumulative_regret.append(regret)

    return cumulative_regret

def main():
    n_rounds = 1000  # Number of rounds to simulate
    k = 5            # Number of actions (k > 2)
    payoffs = simulate_payoff_model(n_rounds, k)

    # Theoretical optimal learning rate for EW
    epsilon = np.sqrt(np.log(k) / n_rounds)

    # Run simulations
    regret_ftl = follow_the_leader(payoffs)
    regret_random = random_guessing(payoffs)
    regret_ew = exponential_weights(payoffs, epsilon)

    # Plot cumulative regret
    rounds = np.arange(1, n_rounds + 1)
    plt.figure(figsize=(12, 6))
    plt.plot(rounds, regret_ftl, label='Follow the Leader')
    plt.plot(rounds, regret_random, label='Uniform Random Guessing')
    plt.plot(rounds, regret_ew, label='Exponential Weights')
    plt.xlabel('Round')
    plt.ylabel('Cumulative Regret')
    plt.title('Cumulative Regret vs. Number of Rounds')
    plt.legend()
    plt.grid(True)
    plt.show()

if __name__ == '__main__':
    main()
Editor is loading...
Leave a Comment