Untitled
unknown
python
2 years ago
827 B
5
Indexable
import numpy as np r = 91 V = np.zeros(shape=(r, 1)) epsilon = 0.001 # We want to do value iteration with backwards recursion # Start with the last state, and work backwards while True: new_V = V.copy() policy = np.zeros(shape=(r, 1)) for i in range(len(V)): failure_prob = 0.1 + (i * 0.01) replacing = 0.6 + V[0] if i == len(V)-1: not_replace = 1 + V[0] else: not_replacing = failure_prob * (1+V[0]) + (1-failure_prob) * V[i+1] v_index = np.argmin([replacing, not_replacing]) policy[i] = v_index new_V[i] = replacing if v_index == 0 else not_replacing span = np.max(new_V - V) - np.min(new_V - V) if span < epsilon: print(policy.T) break else: V = new_V.copy() V -= np.max(V)
Editor is loading...
Leave a Comment