Untitled

mail@pastecode.io avatar
unknown
plain_text
a month ago
1.9 kB
2
Indexable
Never
#scatterplot with nyc map background
import geopandas as gpd
import geodatasets
import contextily as ctx
import matplotlib.pyplot as plt

# Load the GeoDataFrame
df = gpd.read_file(geodatasets.get_path("nybb"))
df_wm = df.to_crs(epsg=4326)

# Create a subplot
fig, ax = plt.subplots(figsize=(10, 10))

# Plot the GeoDataFrame with a basemap
ax = df_wm.plot(ax=ax, alpha=0.5, edgecolor="k")
ctx.add_basemap(ax)

# Scatterplot of latitude and longitude for departures and arrivals
ax.scatter(data_departures['start_station_longitude'], data_departures['start_station_latitude'], s=10, alpha=0.5, color='blue', label='Departures')
ax.scatter(data_arrivals['end_station_longitude'], data_arrivals['end_station_latitude'], s=10, alpha=0.5, color='red', label='Arrivals')

# Set the title and labels
ax.set_title('Station Locations')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

# Add a legend
ax.legend()

# Display the plot
plt.show()

#the points outside nyc seem to only correspond to arrivals
#remove these observations

# Create a GeoDataFrame from your data with both start and end coordinates
gdf = gpd.GeoDataFrame(data_arrivals, 
                       geometry=gpd.points_from_xy(data_arrivals['end_station_longitude'], data_arrivals['end_station_latitude']),
                       crs="EPSG:4326")

# Perform a spatial intersection with the New York City boundaries
filtered_gdf = gdf[gdf.intersects(df_wm.unary_union)]

# Convert the filtered GeoDataFrame back to a regular DataFrame
filtered_data = filtered_gdf.drop(columns="geometry")

# Filter the original data frame based on station IDs
data = data[data['end_station_id'].isin(filtered_data['end_station_id'])]

data_departures=data[['start_station_longitude','start_station_latitude', 'start_station_id']].drop_duplicates()
data_arrivals=data[['end_station_longitude','end_station_latitude', 'end_station_id']].drop_duplicates()