Untitled

mail@pastecode.io avatarunknown
plain_text
2 months ago
2.1 kB
0
Indexable
Never
pip install pandas openpyxl geopy requests

import pandas as pd
from geopy.geocoders import Nominatim
import requests
from datetime import datetime

# Function to get State using Geopy
def get_state(address):
    geolocator = Nominatim(user_agent="geoapiExercises")
    location = geolocator.geocode(address)
    if location:
        return location.raw['address'].get('state', '')
    return ''

# Function to get Latitude and Longitude using Positionstack API
def get_lat_long(address):
    api_key = 'YOUR_POSITIONSTACK_API_KEY'
    url = f'http://api.positionstack.com/v1/forward?access_key={api_key}&query={address}'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data['data']:
            latitude = data['data'][0]['latitude']
            longitude = data['data'][0]['longitude']
            return latitude, longitude
    return None, None

# Read the Excel file
df = pd.read_excel("award_winning_restaurants.xlsx")

# Initialize lists to store new data
state_list = []
latitude_list = []
longitude_list = []

# Iterate through the rows and fetch missing data
for index, row in df.iterrows():
    address = row['Address']
    zip_code = row['Zip Code']
    
    # Get State from Address or Zip Code
    state = get_state(address) or get_state(zip_code)
    state_list.append(state)
    
    # Get Latitude and Longitude
    latitude, longitude = get_lat_long(address)
    latitude_list.append(latitude)
    longitude_list.append(longitude)

# Add the new data to the DataFrame
df['State'] = state_list
df['Latitude'] = latitude_list
df['Longitude'] = longitude_list

# Add Award Name from the previous row to each row
df['Award Name'] = df['2 MICHELIN Stars'].fillna(method='ffill')

# Generate the filename with today's date
current_date = datetime.today().strftime('%Y%m%d')
output_filename = f"award_winning_restaurants_{current_date}.xlsx"

# Save the DataFrame to a new Excel file
df.to_excel(output_filename, index=False, engine='openpyxl')

print(f"Data has been populated and saved to {output_filename}.")