  # sotchastic knapsack implementation unknown
python
a month ago
2.5 kB
0
Indexable
Never
```import numpy as np
from random import randrange

def knapsack_policy_finder(items, capacity):
n = len(items)
# payoff matrix
V = np.zeros((n+1, capacity+1))

# policy alpha[iteration, capacity, item]
alpha = np.zeros((n+1, capacity+1, n+1))

# find all subsets
subset_sum_array = ...

# iteration
for i in range(n, -1, -1):
# capacity left
for c in range(capacity + 1):
subset_sum = capacity - c
max_subset_len = i
left_capacity = c
sub_sets = subset_sum_array[subset_sum, max_subset_len]
sum_of_sub_sets_items = sum([len(sub_s) for sub_s in sub_sets])
# avg value of all paths that could bring to this state
avg_value = sum_of_sub_sets_items/len(sub_sets)
possible_decisions_to_fill_left_capacity = ...
expected_reward = 0
for item in items:
# can be taken
if item < c:
# if avg_value of a state + taking an item total reward is better than
# expected value of a next iteration with unchanged capacity then optimal policy
# is to take an item
if V[i+1, c-item] + 1 > V[i+1, c]:
alpha[i, c, item] = 1

# to calculate expected reward based on the random item
expected_reward += 1

expected_reward /= len(items)

V[i, c] = avg_value + expected_reward

return alpha, V

def simulation(items=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], capacity=10, rounds=1000):
policy, payoff_matrix = knapsack_policy_finder(items, capacity)

for i in range(rounds):
# 10 rounds
episode_return = 0
episode_cap = capacity
taken_items = []
for r in range(len(items)):
item = randrange(1, len(items) + 1)
if item <= episode_cap:
if alpha[r, episode_cap, item]:
episode_cap -= item
episode_return += 1
taken_items.append(item)
else:
taken_items.append(0)
else:
taken_items.append(0)

print(f"final return {episode_return}, taken items: {taken_items}")

simulation(rounds=10)

```  