Untitled
unknown
plain_text
2 years ago
2.9 kB
5
Indexable
import time from zipfile import ZipFile import pandas as pd from typing import Union import ftplib def extract_csv_from_zip(zip_file_path: str, csv_filename: str) -> pd.DataFrame: """ Extracts a CSV file from a zip archive and returns it as a pandas DataFrame. Args: - zip_file_path (str): Path to the zip file - csv_filename (str): Name of the CSV file to extract Returns: - pd.DataFrame: Extracted DataFrame """ with ZipFile(zip_file_path, 'r') as zip_object: zip_object.extract(csv_filename) return pd.read_csv(csv_filename) def convert_to_datetime(df: pd.DataFrame, columns: Union[str, list]) -> pd.DataFrame: """ Converts specified columns in a DataFrame to datetime format. Args: - df (pd.DataFrame): Input DataFrame - columns (Union[str, list]): Column name(s) to convert to datetime Returns: - pd.DataFrame: DataFrame with specified columns converted to datetime """ if isinstance(columns, str): columns = [columns] for col in columns: if col in df.columns: df[col] = pd.to_datetime(df[col] , errors='coerce') return df def convert_to_numeric(df: pd.DataFrame, columns: Union[str, list]) -> pd.DataFrame: """ Converts specified columns in a DataFrame to numeric format. Args: - df (pd.DataFrame): Input DataFrame - columns (Union[str, list]): Column name(s) to convert to numeric Returns: - pd.DataFrame: DataFrame with specified columns converted to numeric """ if isinstance(columns, str): columns = [columns] for col in columns: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') # 'coerce' handles non-numeric values return df if __name__ == '__main__': # File paths zip_file_path = "D:\\scripts\\Shopify_Order_Feed_FTF.zip" csv_filename = "Orders.csv" # Extract CSV file and process DataFrame FTP_HOST = "sftp.saadiadirect.com" FTP_USER = "ftp-seller-QA" FTP_PASS = "8v%u2$v7zi2!" #Connect to FTP server ftp = ftplib.FTP(FTP_HOST, FTP_USER, FTP_PASS) print(ftp.getwelcome()) print(ftp.dir()) ftp.cwd('GCP') print(ftp.dir()) ftp.cwd('FTF') print(ftp.dir()) #Download file from FTP server with open('Shopify_Order_Feed_FTF.zip', 'wb') as f: ftp.retrbinary('RETR ' + 'Shopify_Order_Feed_FTF.zip', f.write) time.sleep(10) extracted_df = extract_csv_from_zip(zip_file_path, csv_filename) #Convert columns to appropriate data types columns_to_datetime = ['Created At', 'Published At', 'Cancelled At', 'Refund: Created At', 'Fulfillment: Created At', 'Fulfillment: Updated At', ] extracted_df = convert_to_datetime(extracted_df, columns_to_datetime) print(extracted_df)
Editor is loading...
Leave a Comment