Untitled
unknown
python
2 years ago
827 B
12
Indexable
import numpy as np
r = 91
V = np.zeros(shape=(r, 1))
epsilon = 0.001
# We want to do value iteration with backwards recursion
# Start with the last state, and work backwards
while True:
new_V = V.copy()
policy = np.zeros(shape=(r, 1))
for i in range(len(V)):
failure_prob = 0.1 + (i * 0.01)
replacing = 0.6 + V[0]
if i == len(V)-1:
not_replace = 1 + V[0]
else:
not_replacing = failure_prob * (1+V[0]) + (1-failure_prob) * V[i+1]
v_index = np.argmin([replacing, not_replacing])
policy[i] = v_index
new_V[i] = replacing if v_index == 0 else not_replacing
span = np.max(new_V - V) - np.min(new_V - V)
if span < epsilon:
print(policy.T)
break
else:
V = new_V.copy()
V -= np.max(V)
Editor is loading...
Leave a Comment