# Stochastic knapsack - version 1

unknown
python
25 days ago
3.9 kB
6
Indexable
Never
import numpy as np
from random import randrange

def subset_generator(s_sum, s_length, items):
if s_length == 1:
return [f"{s_sum}"]

all_subsets = []
for item in items:
item_to_include = 0
if item <= s_sum:
subsets_to_merge_with_the_item = subset_generator(s_sum - item, s_length - 1, items)
item_to_include = item
else:
subsets_to_merge_with_the_item = subset_generator(s_sum, s_length - 1, items)

subsets = [f"{item_to_include}" + ss for ss in subsets_to_merge_with_the_item]
for s in subsets:
all_subsets.append(s)

return list(set(all_subsets))

def knapsack_policy_finder(items, capacity):
n = len(items)
# payoff matrix
V = np.zeros((n+1, capacity+1))

# policy alpha[iteration, capacity, item]
alpha = np.zeros((n+1, capacity+1, n+1))

# find all subsets

# iteration
for i in range(n, -1, -1):
# capacity left
for c in range(capacity + 1):
subset_sum = capacity - c
max_subset_len = i
left_capacity = c

# super slow
# should be only run for n = 10 and all subsets stored in the memo through memoization
# MAIN REASON WHY IT IS SO SLOW FOR N > 6
sub_sets = subset_generator(capacity - c, n, [0] + items)

sum_of_sub_sets_items = 0
for subset in sub_sets:
for l in subset:
if l != '0':
sum_of_sub_sets_items += 1

# avg value of all paths that could bring to this state
avg_value = sum_of_sub_sets_items/len(sub_sets)
expected_reward = 0
#print(f"Capactiy: {c} and iteration {i}")
#print(sub_sets)
#print(sum_of_sub_sets_items)
#print(avg_value)
#print(f"number fo subsets: {len(sub_sets)}")

for item in items:
# can be taken
if item < c:
# if avg_value of a state + taking an item total reward is better than
# expected value of a next iteration with unchanged capacity then optimal policy
# is to take an item
if i+1 <= n and V[i+1, c-item] + 1 > V[i+1, c]:
alpha[i, c, item] = 1

# to calculate expected reward based on the random item
expected_reward += 1

expected_reward /= len(items)

V[i, c] = avg_value + expected_reward

return alpha, V

def simulation(items=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], capacity=10, rounds=1000, p=True):
policy, payoff_matrix = knapsack_policy_finder(items, capacity)
avg_return = 0
for i in range(rounds):
# 10 rounds
episode_return = 0
episode_cap = capacity
taken_items = []
for r in range(len(items)):
item = randrange(1, len(items) + 1)
if item <= episode_cap:
if policy[r, episode_cap, item]:
episode_cap -= item
episode_return += 1
taken_items.append(item)
else:
taken_items.append(0)
else:
taken_items.append(0)

avg_return += episode_return
if p:
print(f"final return {episode_return}, taken items: {taken_items}")
avg_return /= rounds

print(f"avg return for {rounds} simulations: {avg_return}")

# simulation([1,2,3,4,5,6,7,8,9,10], capacity = 10, rounds=1, p=True)
policy, payoff_matrix = knapsack_policy_finder([1, 2, 3, 4, 5, 6], 6)
print(payoff_matrix)