Untitled

 avatar
unknown
plain_text
2 years ago
5.1 kB
13
Indexable
import json
import logging
import requests
import configparser
import pandas as pd
from datetime import datetime
import tkinter as tk
from tkinter import filedialog

logging.basicConfig(level=logging.INFO)

def get_api_key():
    config = configparser.ConfigParser()
    config.read('config.ini')
    return config['API_KEYS'].get('positionstack', '')

def browse_input_file():
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename(title='Select Input File', filetypes=[('Excel Files', '*.xlsx')])
    return file_path

def get_geocoding_data(address, api_key):
    url = f'https://api.positionstack.com/v1/forward?access_key={api_key}&query={address}'
    response = requests.get(url)
    if response.status_code == 200:
        data = json.loads(response.content)
        latitude = data['data'][0]['latitude']
        longitude = data['data'][0]['longitude']
        return latitude, longitude
    return None, None

def get_state_by_zip(zip_code, api_key):
    url = f'https://api.positionstack.com/v1/forward?access_key={api_key}&query={zip_code}&limit=1'
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = json.loads(response.content)
            if data['data']:
                state = data['data'][0]['region']
                return state
            else:
                print("No data found for the given zip code.")
        else:
            print(f"Error: {response.status_code}, {response.text}")
    except Exception as e:
        print(f"Error occurred: {e}")
    return None


def preprocess_excel_data(input_file):
    df = pd.read_excel(input_file, header=None)
    preprocessed_data = []
    current_award = None

    for _, row in df.iterrows():
        if pd.notna(row[0]) and pd.isna(row[1]) and pd.isna(row[2]):
            current_award = row[0]
        elif all(pd.notna(row[col]) for col in range(3)):
            name, address, cuisine = row[0].strip(), row[1].strip(), row[2].strip()
            if name.lower() != 'name' and address.lower() != 'address' and cuisine.lower() != 'cuisine':
                preprocessed_data.append({'Name': name, 'Address': address, 'Cuisine': cuisine, 'Award': current_award.strip()})

    preprocessed_df = pd.DataFrame(preprocessed_data)
    preprocessed_df.dropna(inplace=True)
    return preprocessed_df

def process_award_data(input_file, api_key):
    try:
        preprocessed_df = preprocess_excel_data(input_file)
        rows = []

        for _, row in preprocessed_df.iterrows():
            name, address, cuisine, award_name = row['Name'], row['Address'], row['Cuisine'], row['Award']

            # First, get the state from the zip code
            address_parts = address.split(',')[:-1]
            address_parts = ','.join(address_parts)
            street = address.split(',')[0]
            city = address.split(',')[1]
            zip_code = address.split(',')[-1]
            zip_code_1 = f'{street},{zip_code}'
            state = get_state_by_zip(zip_code_1,api_key)

            # If state is found, insert it into the address
            if state:
                address_with_state = f'{address_parts},{state},{zip_code}'
            else:
                address_with_state = address

            # Now get the latitude, longitude, region, locality, and postal code using the full address
            latitude, longitude = get_geocoding_data(address_with_state, api_key)

            if latitude and longitude:
                rows.append({
                    'Restaurant Name': name,
                    'Street Address': address.split(',')[0],
                    'City': city,
                    'State': state,
                    'Zip Code': zip_code,
                    'Latitude': latitude,
                    'Longitude': longitude,
                    'Cuisine Type': cuisine,
                    'Award Name': award_name
                })

        # Create a new DataFrame with the extracted data and save it to a new Excel file
        award_date = datetime.now().strftime('%Y%m%d')
        output_file = f'award_winning_restaurants_{award_date}.xlsx'
        output_df = pd.DataFrame(rows)
        output_df.to_excel(output_file, index=False)

        logging.info(f'All awards data processed and saved to {output_file}')

        # Print input and output data for debugging and verification
        logging.debug(f'Preprocessed Data:\n{preprocessed_df}')
        logging.debug(f'Output Data:\n{output_df}')

    except Exception as e:
        logging.error(f'Error occurred: {e}')

if __name__ == "__main__":
    try:
        input_file_path = browse_input_file()

        if not input_file_path:
            logging.warning('No input file selected. Exiting.')
        else:
            positionstack_api_key = get_api_key()
            if not positionstack_api_key:
                logging.error('Positionstack API key is missing. Please check the config.ini file.')
            else:
                process_award_data(input_file_path, positionstack_api_key)
    except Exception as e:
        logging.error(f'An unexpected error occurred: {e}')
Editor is loading...