Stochastic knapsack - version 1
unknown
python
25 days ago
3.9 kB
6
Indexable
Never
import numpy as np from random import randrange def subset_generator(s_sum, s_length, items): if s_length == 1: return [f"{s_sum}"] all_subsets = [] for item in items: item_to_include = 0 if item <= s_sum: subsets_to_merge_with_the_item = subset_generator(s_sum - item, s_length - 1, items) item_to_include = item else: subsets_to_merge_with_the_item = subset_generator(s_sum, s_length - 1, items) subsets = [f"{item_to_include}" + ss for ss in subsets_to_merge_with_the_item] for s in subsets: all_subsets.append(s) return list(set(all_subsets)) def knapsack_policy_finder(items, capacity): n = len(items) # payoff matrix V = np.zeros((n+1, capacity+1)) # policy alpha[iteration, capacity, item] alpha = np.zeros((n+1, capacity+1, n+1)) # find all subsets # iteration for i in range(n, -1, -1): # capacity left for c in range(capacity + 1): subset_sum = capacity - c max_subset_len = i left_capacity = c # super slow # should be only run for n = 10 and all subsets stored in the memo through memoization # MAIN REASON WHY IT IS SO SLOW FOR N > 6 sub_sets = subset_generator(capacity - c, n, [0] + items) sum_of_sub_sets_items = 0 for subset in sub_sets: for l in subset: if l != '0': sum_of_sub_sets_items += 1 # avg value of all paths that could bring to this state avg_value = sum_of_sub_sets_items/len(sub_sets) expected_reward = 0 #print(f"Capactiy: {c} and iteration {i}") #print(sub_sets) #print(sum_of_sub_sets_items) #print(avg_value) #print(f"number fo subsets: {len(sub_sets)}") for item in items: # can be taken if item < c: # if avg_value of a state + taking an item total reward is better than # expected value of a next iteration with unchanged capacity then optimal policy # is to take an item if i+1 <= n and V[i+1, c-item] + 1 > V[i+1, c]: alpha[i, c, item] = 1 # to calculate expected reward based on the random item expected_reward += 1 expected_reward /= len(items) V[i, c] = avg_value + expected_reward return alpha, V def simulation(items=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], capacity=10, rounds=1000, p=True): policy, payoff_matrix = knapsack_policy_finder(items, capacity) avg_return = 0 for i in range(rounds): # 10 rounds episode_return = 0 episode_cap = capacity taken_items = [] for r in range(len(items)): item = randrange(1, len(items) + 1) if item <= episode_cap: if policy[r, episode_cap, item]: episode_cap -= item episode_return += 1 taken_items.append(item) else: taken_items.append(0) else: taken_items.append(0) avg_return += episode_return if p: print(f"final return {episode_return}, taken items: {taken_items}") avg_return /= rounds print(f"avg return for {rounds} simulations: {avg_return}") # simulation([1,2,3,4,5,6,7,8,9,10], capacity = 10, rounds=1, p=True) policy, payoff_matrix = knapsack_policy_finder([1, 2, 3, 4, 5, 6], 6) print(payoff_matrix)