Untitled
unknown
plain_text
a year ago
2.5 kB
19
Indexable
def step(self, vehicle, action):
"""
Executes a single step in the environment for the selected vehicle and action.
Args:
vehicle (int): Index of the selected vehicle.
action (int): Index of the node to visit.
Returns:
state (np.ndarray): Updated state of the environment.
reward (float): Reward obtained for the step.
done (bool): Whether the episode has ended.
"""
# Check if the node is already visited (except depots or recharge stations)
if self.visited[action] and action != 0:
reward = -10 # Mild penalty for revisits
return self._get_state(), reward, False
# Get current position and distance to the action node
current_position = self.current_positions[vehicle]
distance = self.distance_matrix[current_position][action]
# Check if the vehicle has enough capacity to fulfill the demand
demand = self.demands[action]
if demand > 0 and self.remaining_capacities[vehicle] >= demand:
delivered = demand # Full delivery
elif demand > 0 and self.remaining_capacities[vehicle] < demand:
delivered = self.remaining_capacities[vehicle] # Partial delivery
else:
delivered = 0 # No delivery possible
# Update capacity and demand
self.remaining_capacities[vehicle] -= delivered
self.demands[action] -= delivered
# Check if the vehicle has enough energy for the trip
energy_used = self.H * distance
if self.remaining_batteries[vehicle] < energy_used:
reward = -50 # Penalty for insufficient energy
return self._get_state(), reward, False
# Update vehicle and environment state
self.route_matrix[current_position][action] = 1
self.vehicle_routes[vehicle].append(action)
self.current_positions[vehicle] = action
self.current_times[vehicle] += distance / 1.2 # Assuming speed = 1.2 units
self.remaining_batteries[vehicle] -= energy_used
if delivered > 0:
self.visited[action] = True
# Compute reward
reward = self.compute_reward(
travel_distance=distance,
next_node=action,
is_visited=False,
energy_used=energy_used,
remaining_capacity=self.remaining_capacities[vehicle],
delivered=delivered,
)
# Check termination condition (all demands fulfilled)
done = all(d == 0 for d in self.demands)
return self._get_state(), reward, done
Editor is loading...
Leave a Comment