Untitled
unknown
plain_text
a month ago
1.6 kB
2
Indexable
Never
import numpy as np # Define the grid and parameters grid_size = (4, 3) actions = ['N', 'S', 'E', 'W'] action_indices = {'N': 0, 'S': 1, 'E': 2, 'W': 3} transition_prob = 0.8 perpendicular_prob = 0.2 discount_factor = 1.0 rewards = {(3, 2): 1, (3, 1): -1} blocked = {(1, 1)} # Define transition model def transition_probabilities(state, action): x, y = state if action == 'N': intended = (x, y - 1) perpendicular = [(x - 1, y), (x + 1, y)] elif action == 'S': intended = (x, y + 1) perpendicular = [(x - 1, y), (x + 1, y)] elif action == 'E': intended = (x + 1, y) perpendicular = [(x, y - 1), (x, y + 1)] elif action == 'W': intended = (x - 1, y) perpendicular = [(x, y - 1), (x, y + 1)] # Check if the move is inside the grid and not blocked def is_valid_state(s): return (0 <= s[0] < grid_size[0] and 0 <= s[1] < grid_size[1] and s not in blocked) transitions = {} if is_valid_state(intended): transitions[intended] = transition_prob else: intended = state # No movement if invalid for p in perpendicular: if is_valid_state(p): if p in transitions: transitions[p] += perpendicular_prob / 2 else: transitions[p] = perpendicular_prob / 2 # Assign remaining probability to no move if all moves are invalid if not transitions: transitions[state] = 1.0 return transitions # Define the reward function def reward_function(state): return rewards.get(state, -0.04)
Leave a Comment