Untitled
unknown
python
2 years ago
1.9 kB
5
Indexable
import numpy as np # 定義狀態和行動 states = ['standing', 'moving', 'fallen'] actions = ['move fast', 'move slow'] # 定義行動和狀態轉移的機率和回報 actions_prob = { 'move fast': { 'standing': {'standing': 0, 'moving': 0.6, 'fallen': 0.4}, 'moving': {'standing': 0, 'moving': 0.8, 'fallen': 0.2}, 'fallen': {'standing': 0, 'moving': 0, 'fallen': 0} }, 'move slow': { 'standing': {'standing': 0, 'moving': 1, 'fallen': 0}, 'moving': {'standing': 0, 'moving': 1, 'fallen': 0}, 'fallen': {'standing': 0.4, 'moving': 0, 'fallen': 0.6} } } actions_reward = { 'move fast': { 'standing': {'standing': 0, 'moving': 2, 'fallen': -1}, 'moving': {'standing': 0, 'moving': 2, 'fallen': -1}, 'fallen': {'standing': 0, 'moving': 0, 'fallen': 0} }, 'move slow': { 'standing': {'standing': 0, 'moving': 1, 'fallen': 0}, 'moving': {'standing': 0, 'moving': 1, 'fallen': 0}, 'fallen': {'standing': 1, 'moving': 0, 'fallen': -1} } } # 初始化狀態值函數 V = { 'standing': 0, 'moving': 0, 'fallen': 0 } # 貝爾曼方程式迭代計算 gamma = 1 num_iterations = 5 for _ in range(num_iterations): new_V = {} for state in states: values = [] for action in actions: action_prob = actions_prob[action][state] action_reward = actions_reward[action][state] value = 0 for next_state in states: value += action_prob[next_state] * (action_reward[next_state] \ + gamma * V[next_state]) values.append(value) print(actions[values.index(max(values))]) new_V[state] = max(values) V = new_V print('k={}---------'.format(_+1)) # 輸出結果 for state in states: print(f"State: {state}, Value: {V[state]}")
Editor is loading...