Untitled

import numpy as np

r = 91
V = np.zeros(shape=(r, 1))
epsilon = 0.001

# We want to do value iteration with backwards recursion
# Start with the last state, and work backwards
while True:
    new_V = V.copy()
    policy = np.zeros(shape=(r, 1))
    for i in range(len(V)):
        failure_prob = 0.1 + (i * 0.01)
        replacing = 0.6 + V[0]
        if i == len(V)-1:
            not_replace = 1 + V[0]
        else:    
            not_replacing = failure_prob * (1+V[0]) + (1-failure_prob) * V[i+1]
        v_index = np.argmin([replacing, not_replacing])
        policy[i] = v_index
        new_V[i] = replacing if v_index == 0 else not_replacing
    
    span = np.max(new_V - V) - np.min(new_V - V)
    if span < epsilon:
        print(policy.T)
        break
    else:
        V = new_V.copy()

    V -= np.max(V)
Editor is loading...