Untitled

mail@pastecode.io avatarunknown
plain_text
2 months ago
5.1 kB
1
Indexable
Never
import json
import logging
import requests
import configparser
import pandas as pd
from datetime import datetime
import tkinter as tk
from tkinter import filedialog

logging.basicConfig(level=logging.INFO)

def get_api_key():
    config = configparser.ConfigParser()
    config.read('config.ini')
    return config['API_KEYS'].get('positionstack', '')

def browse_input_file():
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename(title='Select Input File', filetypes=[('Excel Files', '*.xlsx')])
    return file_path

def get_geocoding_data(address, api_key):
    url = f'https://api.positionstack.com/v1/forward?access_key={api_key}&query={address}'
    response = requests.get(url)
    if response.status_code == 200:
        data = json.loads(response.content)
        latitude = data['data'][0]['latitude']
        longitude = data['data'][0]['longitude']
        return latitude, longitude
    return None, None

def get_state_by_zip(zip_code, api_key):
    url = f'https://api.positionstack.com/v1/forward?access_key={api_key}&query={zip_code}&limit=1'
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = json.loads(response.content)
            if data['data']:
                state = data['data'][0]['region']
                return state
            else:
                print("No data found for the given zip code.")
        else:
            print(f"Error: {response.status_code}, {response.text}")
    except Exception as e:
        print(f"Error occurred: {e}")
    return None


def preprocess_excel_data(input_file):
    df = pd.read_excel(input_file, header=None)
    preprocessed_data = []
    current_award = None

    for _, row in df.iterrows():
        if pd.notna(row[0]) and pd.isna(row[1]) and pd.isna(row[2]):
            current_award = row[0]
        elif all(pd.notna(row[col]) for col in range(3)):
            name, address, cuisine = row[0].strip(), row[1].strip(), row[2].strip()
            if name.lower() != 'name' and address.lower() != 'address' and cuisine.lower() != 'cuisine':
                preprocessed_data.append({'Name': name, 'Address': address, 'Cuisine': cuisine, 'Award': current_award.strip()})

    preprocessed_df = pd.DataFrame(preprocessed_data)
    preprocessed_df.dropna(inplace=True)
    return preprocessed_df

def process_award_data(input_file, api_key):
    try:
        preprocessed_df = preprocess_excel_data(input_file)
        rows = []

        for _, row in preprocessed_df.iterrows():
            name, address, cuisine, award_name = row['Name'], row['Address'], row['Cuisine'], row['Award']

            # First, get the state from the zip code
            address_parts = address.split(',')[:-1]
            address_parts = ','.join(address_parts)
            street = address.split(',')[0]
            city = address.split(',')[1]
            zip_code = address.split(',')[-1]
            zip_code_1 = f'{street},{zip_code}'
            state = get_state_by_zip(zip_code_1,api_key)

            # If state is found, insert it into the address
            if state:
                address_with_state = f'{address_parts},{state},{zip_code}'
            else:
                address_with_state = address

            # Now get the latitude, longitude, region, locality, and postal code using the full address
            latitude, longitude = get_geocoding_data(address_with_state, api_key)

            if latitude and longitude:
                rows.append({
                    'Restaurant Name': name,
                    'Street Address': address.split(',')[0],
                    'City': city,
                    'State': state,
                    'Zip Code': zip_code,
                    'Latitude': latitude,
                    'Longitude': longitude,
                    'Cuisine Type': cuisine,
                    'Award Name': award_name
                })

        # Create a new DataFrame with the extracted data and save it to a new Excel file
        award_date = datetime.now().strftime('%Y%m%d')
        output_file = f'award_winning_restaurants_{award_date}.xlsx'
        output_df = pd.DataFrame(rows)
        output_df.to_excel(output_file, index=False)

        logging.info(f'All awards data processed and saved to {output_file}')

        # Print input and output data for debugging and verification
        logging.debug(f'Preprocessed Data:\n{preprocessed_df}')
        logging.debug(f'Output Data:\n{output_df}')

    except Exception as e:
        logging.error(f'Error occurred: {e}')

if __name__ == "__main__":
    try:
        input_file_path = browse_input_file()

        if not input_file_path:
            logging.warning('No input file selected. Exiting.')
        else:
            positionstack_api_key = get_api_key()
            if not positionstack_api_key:
                logging.error('Positionstack API key is missing. Please check the config.ini file.')
            else:
                process_award_data(input_file_path, positionstack_api_key)
    except Exception as e:
        logging.error(f'An unexpected error occurred: {e}')