sotchastic knapsack implementation

import numpy as np
from random import randrange

def knapsack_policy_finder(items, capacity):
    n = len(items)
    # payoff matrix
    V = np.zeros((n+1, capacity+1))
    
    # policy alpha[iteration, capacity, item]
    alpha = np.zeros((n+1, capacity+1, n+1))
    
    # find all subsets
    subset_sum_array = ...
    
    # iteration
    for i in range(n, -1, -1):
        # capacity left
        for c in range(capacity + 1):
            subset_sum = capacity - c
            max_subset_len = i
            left_capacity = c
            sub_sets = subset_sum_array[subset_sum, max_subset_len]
            sum_of_sub_sets_items = sum([len(sub_s) for sub_s in sub_sets])
            # avg value of all paths that could bring to this state
            avg_value = sum_of_sub_sets_items/len(sub_sets)
            possible_decisions_to_fill_left_capacity = ...
            expected_reward = 0
            for item in items:
                # can be taken
                if item < c:
                    # if avg_value of a state + taking an item total reward is better than
                    # expected value of a next iteration with unchanged capacity then optimal policy
                    # is to take an item
                    if V[i+1, c-item] + 1 > V[i+1, c]:
                        alpha[i, c, item] = 1
                    
                    # to calculate expected reward based on the random item
                    expected_reward += 1
                    
            expected_reward /= len(items)
            
            V[i, c] = avg_value + expected_reward
            
    return alpha, V

def simulation(items=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], capacity=10, rounds=1000):
    policy, payoff_matrix = knapsack_policy_finder(items, capacity)
    
    for i in range(rounds):
        # 10 rounds
        episode_return = 0
        episode_cap = capacity
        taken_items = []
        for r in range(len(items)):
            item = randrange(1, len(items) + 1)
            if item <= episode_cap:
                if alpha[r, episode_cap, item]:
                    episode_cap -= item
                    episode_return += 1
                    taken_items.append(item)
                else:
                    taken_items.append(0)
            else:
                taken_items.append(0)
                
            print(f"final return {episode_return}, taken items: {taken_items}")
            
            
simulation(rounds=10)
Editor is loading...