Untitled
unknown
plain_text
a month ago
1.9 kB
2
Indexable
Never
#scatterplot with nyc map background import geopandas as gpd import geodatasets import contextily as ctx import matplotlib.pyplot as plt # Load the GeoDataFrame df = gpd.read_file(geodatasets.get_path("nybb")) df_wm = df.to_crs(epsg=4326) # Create a subplot fig, ax = plt.subplots(figsize=(10, 10)) # Plot the GeoDataFrame with a basemap ax = df_wm.plot(ax=ax, alpha=0.5, edgecolor="k") ctx.add_basemap(ax) # Scatterplot of latitude and longitude for departures and arrivals ax.scatter(data_departures['start_station_longitude'], data_departures['start_station_latitude'], s=10, alpha=0.5, color='blue', label='Departures') ax.scatter(data_arrivals['end_station_longitude'], data_arrivals['end_station_latitude'], s=10, alpha=0.5, color='red', label='Arrivals') # Set the title and labels ax.set_title('Station Locations') ax.set_xlabel('Longitude') ax.set_ylabel('Latitude') # Add a legend ax.legend() # Display the plot plt.show() #the points outside nyc seem to only correspond to arrivals #remove these observations # Create a GeoDataFrame from your data with both start and end coordinates gdf = gpd.GeoDataFrame(data_arrivals, geometry=gpd.points_from_xy(data_arrivals['end_station_longitude'], data_arrivals['end_station_latitude']), crs="EPSG:4326") # Perform a spatial intersection with the New York City boundaries filtered_gdf = gdf[gdf.intersects(df_wm.unary_union)] # Convert the filtered GeoDataFrame back to a regular DataFrame filtered_data = filtered_gdf.drop(columns="geometry") # Filter the original data frame based on station IDs data = data[data['end_station_id'].isin(filtered_data['end_station_id'])] data_departures=data[['start_station_longitude','start_station_latitude', 'start_station_id']].drop_duplicates() data_arrivals=data[['end_station_longitude','end_station_latitude', 'end_station_id']].drop_duplicates()