Untitled
unknown
plain_text
a year ago
1.6 kB
16
Indexable
import numpy as np
# Define the grid and parameters
grid_size = (4, 3)
actions = ['N', 'S', 'E', 'W']
action_indices = {'N': 0, 'S': 1, 'E': 2, 'W': 3}
transition_prob = 0.8
perpendicular_prob = 0.2
discount_factor = 1.0
rewards = {(3, 2): 1, (3, 1): -1}
blocked = {(1, 1)}
# Define transition model
def transition_probabilities(state, action):
x, y = state
if action == 'N':
intended = (x, y - 1)
perpendicular = [(x - 1, y), (x + 1, y)]
elif action == 'S':
intended = (x, y + 1)
perpendicular = [(x - 1, y), (x + 1, y)]
elif action == 'E':
intended = (x + 1, y)
perpendicular = [(x, y - 1), (x, y + 1)]
elif action == 'W':
intended = (x - 1, y)
perpendicular = [(x, y - 1), (x, y + 1)]
# Check if the move is inside the grid and not blocked
def is_valid_state(s):
return (0 <= s[0] < grid_size[0] and 0 <= s[1] < grid_size[1] and s not in blocked)
transitions = {}
if is_valid_state(intended):
transitions[intended] = transition_prob
else:
intended = state # No movement if invalid
for p in perpendicular:
if is_valid_state(p):
if p in transitions:
transitions[p] += perpendicular_prob / 2
else:
transitions[p] = perpendicular_prob / 2
# Assign remaining probability to no move if all moves are invalid
if not transitions:
transitions[state] = 1.0
return transitions
# Define the reward function
def reward_function(state):
return rewards.get(state, -0.04)
Editor is loading...
Leave a Comment