Untitled

mail@pastecode.io avatar
unknown
plain_text
2 months ago
2.9 kB
1
Indexable
Never
import time
from zipfile import ZipFile
import pandas as pd
from typing import Union
import ftplib


def extract_csv_from_zip(zip_file_path: str, csv_filename: str) -> pd.DataFrame:
    """
    Extracts a CSV file from a zip archive and returns it as a pandas DataFrame.

    Args:
    - zip_file_path (str): Path to the zip file
    - csv_filename (str): Name of the CSV file to extract

    Returns:
    - pd.DataFrame: Extracted DataFrame
    """
    with ZipFile(zip_file_path, 'r') as zip_object:
        zip_object.extract(csv_filename)

    return pd.read_csv(csv_filename)


def convert_to_datetime(df: pd.DataFrame, columns: Union[str, list]) -> pd.DataFrame:
    """
    Converts specified columns in a DataFrame to datetime format.

    Args:
    - df (pd.DataFrame): Input DataFrame
    - columns (Union[str, list]): Column name(s) to convert to datetime

    Returns:
    - pd.DataFrame: DataFrame with specified columns converted to datetime
    """
    if isinstance(columns, str):
        columns = [columns]

    for col in columns:
        if col in df.columns:
            df[col] =  pd.to_datetime(df[col] , errors='coerce')
    return df


def convert_to_numeric(df: pd.DataFrame, columns: Union[str, list]) -> pd.DataFrame:
    """
    Converts specified columns in a DataFrame to numeric format.

    Args:
    - df (pd.DataFrame): Input DataFrame
    - columns (Union[str, list]): Column name(s) to convert to numeric

    Returns:
    - pd.DataFrame: DataFrame with specified columns converted to numeric
    """
    if isinstance(columns, str):
        columns = [columns]

    for col in columns:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')  # 'coerce' handles non-numeric values

    return df


if __name__ == '__main__':
    # File paths
    zip_file_path = "D:\\scripts\\Shopify_Order_Feed_FTF.zip"
    csv_filename = "Orders.csv"
    # Extract CSV file and process DataFrame
    FTP_HOST = "sftp.saadiadirect.com"
    FTP_USER = "ftp-seller-QA"
    FTP_PASS = "8v%u2$v7zi2!"
    #Connect to FTP server

    ftp = ftplib.FTP(FTP_HOST, FTP_USER, FTP_PASS)
    print(ftp.getwelcome())
    print(ftp.dir())
    ftp.cwd('GCP')
    print(ftp.dir())
    ftp.cwd('FTF')
    print(ftp.dir())
    #Download file from FTP server

    with open('Shopify_Order_Feed_FTF.zip', 'wb') as f:
        ftp.retrbinary('RETR ' + 'Shopify_Order_Feed_FTF.zip', f.write)

        time.sleep(10)
        extracted_df = extract_csv_from_zip(zip_file_path, csv_filename)

        #Convert columns to appropriate data types
        columns_to_datetime = ['Created At', 'Published At', 'Cancelled At', 'Refund: Created At', 'Fulfillment: Created At', 'Fulfillment: Updated At', ]

        extracted_df = convert_to_datetime(extracted_df, columns_to_datetime)

        print(extracted_df)

 
Leave a Comment