Untitled

import numpy as np

# Define the grid and parameters
grid_size = (4, 3)
actions = ['N', 'S', 'E', 'W']
action_indices = {'N': 0, 'S': 1, 'E': 2, 'W': 3}
transition_prob = 0.8
perpendicular_prob = 0.2
discount_factor = 1.0
rewards = {(3, 2): 1, (3, 1): -1}
blocked = {(1, 1)}

# Define transition model
def transition_probabilities(state, action):
    x, y = state
    if action == 'N':
        intended = (x, y - 1)
        perpendicular = [(x - 1, y), (x + 1, y)]
    elif action == 'S':
        intended = (x, y + 1)
        perpendicular = [(x - 1, y), (x + 1, y)]
    elif action == 'E':
        intended = (x + 1, y)
        perpendicular = [(x, y - 1), (x, y + 1)]
    elif action == 'W':
        intended = (x - 1, y)
        perpendicular = [(x, y - 1), (x, y + 1)]
    
    # Check if the move is inside the grid and not blocked
    def is_valid_state(s):
        return (0 <= s[0] < grid_size[0] and 0 <= s[1] < grid_size[1] and s not in blocked)
    
    transitions = {}
    if is_valid_state(intended):
        transitions[intended] = transition_prob
    else:
        intended = state  # No movement if invalid

    for p in perpendicular:
        if is_valid_state(p):
            if p in transitions:
                transitions[p] += perpendicular_prob / 2
            else:
                transitions[p] = perpendicular_prob / 2

    # Assign remaining probability to no move if all moves are invalid
    if not transitions:
        transitions[state] = 1.0

    return transitions

# Define the reward function
def reward_function(state):
    return rewards.get(state, -0.04)
Editor is loading...