sotchastic knapsack implementation
unknown
python
a year ago
2.5 kB
6
Indexable
import numpy as np from random import randrange def knapsack_policy_finder(items, capacity): n = len(items) # payoff matrix V = np.zeros((n+1, capacity+1)) # policy alpha[iteration, capacity, item] alpha = np.zeros((n+1, capacity+1, n+1)) # find all subsets subset_sum_array = ... # iteration for i in range(n, -1, -1): # capacity left for c in range(capacity + 1): subset_sum = capacity - c max_subset_len = i left_capacity = c sub_sets = subset_sum_array[subset_sum, max_subset_len] sum_of_sub_sets_items = sum([len(sub_s) for sub_s in sub_sets]) # avg value of all paths that could bring to this state avg_value = sum_of_sub_sets_items/len(sub_sets) possible_decisions_to_fill_left_capacity = ... expected_reward = 0 for item in items: # can be taken if item < c: # if avg_value of a state + taking an item total reward is better than # expected value of a next iteration with unchanged capacity then optimal policy # is to take an item if V[i+1, c-item] + 1 > V[i+1, c]: alpha[i, c, item] = 1 # to calculate expected reward based on the random item expected_reward += 1 expected_reward /= len(items) V[i, c] = avg_value + expected_reward return alpha, V def simulation(items=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], capacity=10, rounds=1000): policy, payoff_matrix = knapsack_policy_finder(items, capacity) for i in range(rounds): # 10 rounds episode_return = 0 episode_cap = capacity taken_items = [] for r in range(len(items)): item = randrange(1, len(items) + 1) if item <= episode_cap: if alpha[r, episode_cap, item]: episode_cap -= item episode_return += 1 taken_items.append(item) else: taken_items.append(0) else: taken_items.append(0) print(f"final return {episode_return}, taken items: {taken_items}") simulation(rounds=10)
Editor is loading...