Untitled
unknown
plain_text
2 years ago
5.1 kB
13
Indexable
import json
import logging
import requests
import configparser
import pandas as pd
from datetime import datetime
import tkinter as tk
from tkinter import filedialog
logging.basicConfig(level=logging.INFO)
def get_api_key():
config = configparser.ConfigParser()
config.read('config.ini')
return config['API_KEYS'].get('positionstack', '')
def browse_input_file():
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename(title='Select Input File', filetypes=[('Excel Files', '*.xlsx')])
return file_path
def get_geocoding_data(address, api_key):
url = f'https://api.positionstack.com/v1/forward?access_key={api_key}&query={address}'
response = requests.get(url)
if response.status_code == 200:
data = json.loads(response.content)
latitude = data['data'][0]['latitude']
longitude = data['data'][0]['longitude']
return latitude, longitude
return None, None
def get_state_by_zip(zip_code, api_key):
url = f'https://api.positionstack.com/v1/forward?access_key={api_key}&query={zip_code}&limit=1'
try:
response = requests.get(url)
if response.status_code == 200:
data = json.loads(response.content)
if data['data']:
state = data['data'][0]['region']
return state
else:
print("No data found for the given zip code.")
else:
print(f"Error: {response.status_code}, {response.text}")
except Exception as e:
print(f"Error occurred: {e}")
return None
def preprocess_excel_data(input_file):
df = pd.read_excel(input_file, header=None)
preprocessed_data = []
current_award = None
for _, row in df.iterrows():
if pd.notna(row[0]) and pd.isna(row[1]) and pd.isna(row[2]):
current_award = row[0]
elif all(pd.notna(row[col]) for col in range(3)):
name, address, cuisine = row[0].strip(), row[1].strip(), row[2].strip()
if name.lower() != 'name' and address.lower() != 'address' and cuisine.lower() != 'cuisine':
preprocessed_data.append({'Name': name, 'Address': address, 'Cuisine': cuisine, 'Award': current_award.strip()})
preprocessed_df = pd.DataFrame(preprocessed_data)
preprocessed_df.dropna(inplace=True)
return preprocessed_df
def process_award_data(input_file, api_key):
try:
preprocessed_df = preprocess_excel_data(input_file)
rows = []
for _, row in preprocessed_df.iterrows():
name, address, cuisine, award_name = row['Name'], row['Address'], row['Cuisine'], row['Award']
# First, get the state from the zip code
address_parts = address.split(',')[:-1]
address_parts = ','.join(address_parts)
street = address.split(',')[0]
city = address.split(',')[1]
zip_code = address.split(',')[-1]
zip_code_1 = f'{street},{zip_code}'
state = get_state_by_zip(zip_code_1,api_key)
# If state is found, insert it into the address
if state:
address_with_state = f'{address_parts},{state},{zip_code}'
else:
address_with_state = address
# Now get the latitude, longitude, region, locality, and postal code using the full address
latitude, longitude = get_geocoding_data(address_with_state, api_key)
if latitude and longitude:
rows.append({
'Restaurant Name': name,
'Street Address': address.split(',')[0],
'City': city,
'State': state,
'Zip Code': zip_code,
'Latitude': latitude,
'Longitude': longitude,
'Cuisine Type': cuisine,
'Award Name': award_name
})
# Create a new DataFrame with the extracted data and save it to a new Excel file
award_date = datetime.now().strftime('%Y%m%d')
output_file = f'award_winning_restaurants_{award_date}.xlsx'
output_df = pd.DataFrame(rows)
output_df.to_excel(output_file, index=False)
logging.info(f'All awards data processed and saved to {output_file}')
# Print input and output data for debugging and verification
logging.debug(f'Preprocessed Data:\n{preprocessed_df}')
logging.debug(f'Output Data:\n{output_df}')
except Exception as e:
logging.error(f'Error occurred: {e}')
if __name__ == "__main__":
try:
input_file_path = browse_input_file()
if not input_file_path:
logging.warning('No input file selected. Exiting.')
else:
positionstack_api_key = get_api_key()
if not positionstack_api_key:
logging.error('Positionstack API key is missing. Please check the config.ini file.')
else:
process_award_data(input_file_path, positionstack_api_key)
except Exception as e:
logging.error(f'An unexpected error occurred: {e}')
Editor is loading...