Untitled
plain_text
2 months ago
5.1 kB
1
Indexable
Never
import json import logging import requests import configparser import pandas as pd from datetime import datetime import tkinter as tk from tkinter import filedialog logging.basicConfig(level=logging.INFO) def get_api_key(): config = configparser.ConfigParser() config.read('config.ini') return config['API_KEYS'].get('positionstack', '') def browse_input_file(): root = tk.Tk() root.withdraw() file_path = filedialog.askopenfilename(title='Select Input File', filetypes=[('Excel Files', '*.xlsx')]) return file_path def get_geocoding_data(address, api_key): url = f'https://api.positionstack.com/v1/forward?access_key={api_key}&query={address}' response = requests.get(url) if response.status_code == 200: data = json.loads(response.content) latitude = data['data'][0]['latitude'] longitude = data['data'][0]['longitude'] return latitude, longitude return None, None def get_state_by_zip(zip_code, api_key): url = f'https://api.positionstack.com/v1/forward?access_key={api_key}&query={zip_code}&limit=1' try: response = requests.get(url) if response.status_code == 200: data = json.loads(response.content) if data['data']: state = data['data'][0]['region'] return state else: print("No data found for the given zip code.") else: print(f"Error: {response.status_code}, {response.text}") except Exception as e: print(f"Error occurred: {e}") return None def preprocess_excel_data(input_file): df = pd.read_excel(input_file, header=None) preprocessed_data = [] current_award = None for _, row in df.iterrows(): if pd.notna(row[0]) and pd.isna(row[1]) and pd.isna(row[2]): current_award = row[0] elif all(pd.notna(row[col]) for col in range(3)): name, address, cuisine = row[0].strip(), row[1].strip(), row[2].strip() if name.lower() != 'name' and address.lower() != 'address' and cuisine.lower() != 'cuisine': preprocessed_data.append({'Name': name, 'Address': address, 'Cuisine': cuisine, 'Award': current_award.strip()}) preprocessed_df = pd.DataFrame(preprocessed_data) preprocessed_df.dropna(inplace=True) return preprocessed_df def process_award_data(input_file, api_key): try: preprocessed_df = preprocess_excel_data(input_file) rows = [] for _, row in preprocessed_df.iterrows(): name, address, cuisine, award_name = row['Name'], row['Address'], row['Cuisine'], row['Award'] # First, get the state from the zip code address_parts = address.split(',')[:-1] address_parts = ','.join(address_parts) street = address.split(',')[0] city = address.split(',')[1] zip_code = address.split(',')[-1] zip_code_1 = f'{street},{zip_code}' state = get_state_by_zip(zip_code_1,api_key) # If state is found, insert it into the address if state: address_with_state = f'{address_parts},{state},{zip_code}' else: address_with_state = address # Now get the latitude, longitude, region, locality, and postal code using the full address latitude, longitude = get_geocoding_data(address_with_state, api_key) if latitude and longitude: rows.append({ 'Restaurant Name': name, 'Street Address': address.split(',')[0], 'City': city, 'State': state, 'Zip Code': zip_code, 'Latitude': latitude, 'Longitude': longitude, 'Cuisine Type': cuisine, 'Award Name': award_name }) # Create a new DataFrame with the extracted data and save it to a new Excel file award_date = datetime.now().strftime('%Y%m%d') output_file = f'award_winning_restaurants_{award_date}.xlsx' output_df = pd.DataFrame(rows) output_df.to_excel(output_file, index=False) logging.info(f'All awards data processed and saved to {output_file}') # Print input and output data for debugging and verification logging.debug(f'Preprocessed Data:\n{preprocessed_df}') logging.debug(f'Output Data:\n{output_df}') except Exception as e: logging.error(f'Error occurred: {e}') if __name__ == "__main__": try: input_file_path = browse_input_file() if not input_file_path: logging.warning('No input file selected. Exiting.') else: positionstack_api_key = get_api_key() if not positionstack_api_key: logging.error('Positionstack API key is missing. Please check the config.ini file.') else: process_award_data(input_file_path, positionstack_api_key) except Exception as e: logging.error(f'An unexpected error occurred: {e}')