Untitled

 avatar
unknown
plain_text
10 months ago
9.4 kB
1
Indexable
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import pandas as pd
from colorama import init, Fore, Style

# Initialize colorama
init(autoreset=True)

def load_data(file_path):
    try:
        data = pd.read_csv(file_path, delimiter=';', encoding='latin1')
        return data
    except Exception as e:
        print(f"Error loading file: {e}")
        return None

def sum_billed(status_counts, positive=True):
    if positive:
        billed_total = status_counts.get('zbilingowany', 0) + status_counts.get('zbilingowany - POPC', 0)
    else:
        billed_total = status_counts.get('zbilingowany negatywnie', 0) + status_counts.get('zbilingowany negatywnie - POPC', 0)
    return billed_total

def sum_transferred(status_counts):
    transferred_total = (
        status_counts.get('przekazane do call center - POPC', 0) +
        status_counts.get('przekazane do call center - MMP', 0) +
        status_counts.get('przekazane do call center - OK - POPC', 0) +
        status_counts.get('przekazane do call center - OK', 0) +
        status_counts.get('przekazane do call center - MMP - POPC', 0)
    )
    return transferred_total

def sum_unfinished(status_counts):
    unfinished_total = (
        status_counts.get('nieukończony wniosek', 0) +
        status_counts.get('nieukończony wniosek - POPC', 0)
    )
    return unfinished_total

def filter_out_tests(data, column):
    return data[~data[column].str.contains('Test', case=False, na=False)]

def print_status_counts(title, status_counts, sum_positive, sum_negative, sum_transferred, sum_unfinished):
    print(Fore.GREEN + title)
    for status, count in status_counts.items():
        print(f"{status}: {count}")
    print(f"\n{Fore.YELLOW}zbilingowany + zbilingowany - POPC: {sum_positive}")
    print(f"{Fore.RED}zbilingowany negatywnie + zbilingowany negatywnie - POPC: {sum_negative}")
    print(f"{Fore.CYAN}przekazane do call center - POPC + MMP + OK - POPC + OK + MMP - POPC: {sum_transferred}")
    print(f"{Fore.MAGENTA}nieukończony wniosek + nieukończony wniosek - POPC: {sum_unfinished}")
    print("\n" + Style.RESET_ALL)

def print_special_status_counts(title, status_counts):
    print(Fore.GREEN + title)
    for status, count in status_counts.items():
        print(f"{status}: {count}")
    print("\n" + Style.RESET_ALL)

def main():
    file_name = 'raporttestowy.csv'

    data = load_data(file_name)

    if data is not None:
        print(Fore.BLUE + "Data loaded successfully. Here are the first few rows:")
        print(data.head())

        # Display all column names to identify issues
        print(Fore.BLUE + "Columns in the dataset:")
        print(data.columns)

        # Get the data from columns "D", "L", "P" and "K"
        source_column = data.columns[3]  # Assuming "D" is the fourth column (index 3)
        status_column = data.columns[11]  # Assuming "L" is the twelfth column (index 11)
        surname_column = data.columns[15]  # Assuming "P" is the sixteenth column (index 15)
        date_column = data.columns[10]  # Assuming "K" is the eleventh column (index 10)

        # Check if "Nazwa paczki" is in column "AC" or "AG"
        if len(data.columns) > 28 and data.columns[28] == 'Nazwa paczki':
            package_column = data.columns[28]  # Assuming "AC" is the twenty-ninth column (index 28)
        elif len(data.columns) > 32 and data.columns[32] == 'Nazwa paczki':
            package_column = data.columns[32]  # Assuming "AG" is the thirty-third column (index 32)
        else:
            print(Fore.RED + "Nazwa paczki column not found.")
            return

        # Filter out rows where column "P" contains "Test" or "test"
        data = filter_out_tests(data, surname_column)

        # Filter data to include only rows where column "D" has value "www nowe"
        filtered_www = data[data[source_column] == 'www nowe']

        # Filter data to include rows where column "D" has value "www nowe" or "mmp nowe"
        filtered_combined = data[data[source_column].isin(['www nowe', 'mmp nowe'])]

        # Count occurrences of each unique value in the status column for the filtered data
        status_counts_www = filtered_www[status_column].value_counts()
        status_counts_combined = filtered_combined[status_column].value_counts()

        # Print the status counts for 'www nowe'
        print_status_counts(
            "Status counts for 'www nowe' in column 'Źródło zamówienia':",
            status_counts_www,
            sum_billed(status_counts_www),
            sum_billed(status_counts_www, positive=False),
            sum_transferred(status_counts_www),
            sum_unfinished(status_counts_www)
        )

        # Print the status counts for 'www nowe' and 'mmp nowe' combined
        print_status_counts(
            "Status counts for 'www nowe' and 'mmp nowe' in column 'Źródło zamówienia':",
            status_counts_combined,
            sum_billed(status_counts_combined),
            sum_billed(status_counts_combined, positive=False),
            sum_transferred(status_counts_combined),
            sum_unfinished(status_counts_combined)
        )

        # Further filter data to include only rows where column "Nazwa paczki" contains "INT"
        filtered_final = filtered_combined[filtered_combined[package_column].str.contains('INT', na=False)]

        # Count occurrences of each unique value in the status column for the filtered data
        status_counts_final = filtered_final[status_column].value_counts()

        # Print the status counts for the final filtered data
        print_status_counts(
            "Status counts for 'www nowe' and 'mmp nowe' in column 'Źródło zamówienia' with 'INT' in column 'Nazwa paczki':",
            status_counts_final,
            sum_billed(status_counts_final),
            sum_billed(status_counts_final, positive=False),
            sum_transferred(status_counts_final),
            sum_unfinished(status_counts_final)
        )

        # Filter data to include only rows where column "Nazwa paczki" contains "MVNO"
        filtered_mvno = filtered_combined[filtered_combined[package_column].str.contains('MVNO', na=False)]

        # Count occurrences of each unique value in the status column for the filtered data
        status_counts_mvno = filtered_mvno[status_column].value_counts()

        # Print the status counts for the filtered MVNO data
        print_special_status_counts(
            "Status counts for 'MVNO' in column 'Nazwa paczki':",
            status_counts_mvno
        )

        # Filter data to include only rows where column "Nazwa paczki" contains "K Vectra TV Smart"
        filtered_vtv = filtered_combined[filtered_combined[package_column].str.contains('K Vectra TV Smart', na=False)]

        # Count occurrences of each unique value in the status column for the filtered data
        status_counts_vtv = filtered_vtv[status_column].value_counts()

        # Print the status counts for the filtered VTV data
        print_special_status_counts(
            "Status counts for 'K Vectra TV Smart' in column 'Nazwa paczki' for 'www nowe' and 'mmp nowe':",
            status_counts_vtv
        )
        # Filter data to include only rows where 'Status zamówienia' is 'zbilingowany' and 'Nazwa paczki' contains 'INT'
        filtered_billed_int = filtered_combined[
            (filtered_combined[status_column] == 'zbilingowany') &
            (filtered_combined[package_column].str.contains('INT', na=False))
        ]
        
        # Extract and count unique dates in the 'Termin wizyty/montażu' column
        filtered_billed_int['Date Only'] = pd.to_datetime(filtered_billed_int[date_column].str.split(' ').str[0], format='%d.%m.%Y')
        filtered_billed_int['Month'] = filtered_billed_int['Date Only'].dt.strftime('%m')
        filtered_billed_int['Day'] = filtered_billed_int['Date Only'].dt.strftime('%d.%m')
        date_counts = filtered_billed_int.groupby('Month')['Day'].value_counts().sort_index(level=[0, 1])

        # Print the date counts for the 'Date Only' column, grouped and sorted by month
        print(Fore.GREEN + "Data instalacji:")
        current_month = None
        for (month, day), count in date_counts.items():
            if month != current_month:
                current_month = month
                print(f"\n{month}.")
            print(f"{day}: {count}")

                # Filter for 'TOP 3' conditions: 'INT' in package column, 'www nowe' or 'mmp nowe' in source column, and 'zbilingowany' in status column
        top_filtered = filtered_final[
            (filtered_final[source_column].isin(['www nowe', 'mmp nowe'])) &
            (filtered_final[status_column] == 'zbilingowany')
        ]

        # Get the top 3 most frequent package names
        top_packages = top_filtered[package_column].value_counts().head(3)

        # Print the top 3 package names
        print(Fore.GREEN + "TOP 3 'INT' packages:")
        for package, count in top_packages.items():
            print(f"{package}: {count}")
    
        
    else:
        print(Fore.RED + "Failed to load data.")

if __name__ == "__main__":
    main()
Editor is loading...
Leave a Comment