Untitled

def step(self, vehicle, action):
    """
    Executes a single step in the environment for the selected vehicle and action.

    Args:
        vehicle (int): Index of the selected vehicle.
        action (int): Index of the node to visit.

    Returns:
        state (np.ndarray): Updated state of the environment.
        reward (float): Reward obtained for the step.
        done (bool): Whether the episode has ended.
    """
    # Check if the node is already visited (except depots or recharge stations)
    if self.visited[action] and action != 0:
        reward = -10  # Mild penalty for revisits
        return self._get_state(), reward, False

    # Get current position and distance to the action node
    current_position = self.current_positions[vehicle]
    distance = self.distance_matrix[current_position][action]

    # Check if the vehicle has enough capacity to fulfill the demand
    demand = self.demands[action]
    if demand > 0 and self.remaining_capacities[vehicle] >= demand:
        delivered = demand  # Full delivery
    elif demand > 0 and self.remaining_capacities[vehicle] < demand:
        delivered = self.remaining_capacities[vehicle]  # Partial delivery
    else:
        delivered = 0  # No delivery possible

    # Update capacity and demand
    self.remaining_capacities[vehicle] -= delivered
    self.demands[action] -= delivered

    # Check if the vehicle has enough energy for the trip
    energy_used = self.H * distance
    if self.remaining_batteries[vehicle] < energy_used:
        reward = -50  # Penalty for insufficient energy
        return self._get_state(), reward, False

    # Update vehicle and environment state
    self.route_matrix[current_position][action] = 1
    self.vehicle_routes[vehicle].append(action)
    self.current_positions[vehicle] = action
    self.current_times[vehicle] += distance / 1.2  # Assuming speed = 1.2 units
    self.remaining_batteries[vehicle] -= energy_used
    if delivered > 0:
        self.visited[action] = True

    # Compute reward
    reward = self.compute_reward(
        travel_distance=distance,
        next_node=action,
        is_visited=False,
        energy_used=energy_used,
        remaining_capacity=self.remaining_capacities[vehicle],
        delivered=delivered,
    )

    # Check termination condition (all demands fulfilled)
    done = all(d == 0 for d in self.demands)

    return self._get_state(), reward, done
Editor is loading...