Untitled
unknown
python
2 years ago
2.3 kB
16
Indexable
from zipfile import ZipFile
import pandas as pd
from typing import Union
def extract_csv_from_zip(zip_file_path: str, csv_filename: str) -> pd.DataFrame:
"""
Extracts a CSV file from a zip archive and returns it as a pandas DataFrame.
Args:
- zip_file_path (str): Path to the zip file
- csv_filename (str): Name of the CSV file to extract
Returns:
- pd.DataFrame: Extracted DataFrame
"""
with ZipFile(zip_file_path, 'r') as zip_object:
zip_object.extract(csv_filename)
return pd.read_csv(csv_filename)
def convert_to_datetime(df: pd.DataFrame, columns: Union[str, list]) -> pd.DataFrame:
"""
Converts specified columns in a DataFrame to datetime format.
Args:
- df (pd.DataFrame): Input DataFrame
- columns (Union[str, list]): Column name(s) to convert to datetime
Returns:
- pd.DataFrame: DataFrame with specified columns converted to datetime
"""
if isinstance(columns, str):
columns = [columns]
for col in columns:
if col in df.columns:
df[col] = pd.to_datetime(df[col])
return df
def convert_to_numeric(df: pd.DataFrame, columns: Union[str, list]) -> pd.DataFrame:
"""
Converts specified columns in a DataFrame to numeric format.
Args:
- df (pd.DataFrame): Input DataFrame
- columns (Union[str, list]): Column name(s) to convert to numeric
Returns:
- pd.DataFrame: DataFrame with specified columns converted to numeric
"""
if isinstance(columns, str):
columns = [columns]
for col in columns:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce') # 'coerce' handles non-numeric values
return df
if __name__ == '__main__':
# File paths
zip_file_path = "D:\\scripts\\Merchant - Shopify - Report.zip"
csv_filename = "Products.csv"
# Extract CSV file and process DataFrame
extracted_df = extract_csv_from_zip(zip_file_path, csv_filename)
# Convert columns to appropriate data types
columns_to_datetime = ['Created At', 'Published At']
columns_to_numeric = 'ID'
extracted_df = convert_to_datetime(extracted_df, columns_to_datetime)
extracted_df = convert_to_numeric(extracted_df, columns_to_numeric)
print(extracted_df)
Editor is loading...
Leave a Comment