Untitled
unknown
plain_text
5 months ago
2.5 kB
3
Indexable
def step(self, vehicle, action): """ Executes a single step in the environment for the selected vehicle and action. Args: vehicle (int): Index of the selected vehicle. action (int): Index of the node to visit. Returns: state (np.ndarray): Updated state of the environment. reward (float): Reward obtained for the step. done (bool): Whether the episode has ended. """ # Check if the node is already visited (except depots or recharge stations) if self.visited[action] and action != 0: reward = -10 # Mild penalty for revisits return self._get_state(), reward, False # Get current position and distance to the action node current_position = self.current_positions[vehicle] distance = self.distance_matrix[current_position][action] # Check if the vehicle has enough capacity to fulfill the demand demand = self.demands[action] if demand > 0 and self.remaining_capacities[vehicle] >= demand: delivered = demand # Full delivery elif demand > 0 and self.remaining_capacities[vehicle] < demand: delivered = self.remaining_capacities[vehicle] # Partial delivery else: delivered = 0 # No delivery possible # Update capacity and demand self.remaining_capacities[vehicle] -= delivered self.demands[action] -= delivered # Check if the vehicle has enough energy for the trip energy_used = self.H * distance if self.remaining_batteries[vehicle] < energy_used: reward = -50 # Penalty for insufficient energy return self._get_state(), reward, False # Update vehicle and environment state self.route_matrix[current_position][action] = 1 self.vehicle_routes[vehicle].append(action) self.current_positions[vehicle] = action self.current_times[vehicle] += distance / 1.2 # Assuming speed = 1.2 units self.remaining_batteries[vehicle] -= energy_used if delivered > 0: self.visited[action] = True # Compute reward reward = self.compute_reward( travel_distance=distance, next_node=action, is_visited=False, energy_used=energy_used, remaining_capacity=self.remaining_capacities[vehicle], delivered=delivered, ) # Check termination condition (all demands fulfilled) done = all(d == 0 for d in self.demands) return self._get_state(), reward, done
Editor is loading...
Leave a Comment