Untitled
unknown
plain_text
2 years ago
9.4 kB
9
Indexable
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import pandas as pd
from colorama import init, Fore, Style
# Initialize colorama
init(autoreset=True)
def load_data(file_path):
try:
data = pd.read_csv(file_path, delimiter=';', encoding='latin1')
return data
except Exception as e:
print(f"Error loading file: {e}")
return None
def sum_billed(status_counts, positive=True):
if positive:
billed_total = status_counts.get('zbilingowany', 0) + status_counts.get('zbilingowany - POPC', 0)
else:
billed_total = status_counts.get('zbilingowany negatywnie', 0) + status_counts.get('zbilingowany negatywnie - POPC', 0)
return billed_total
def sum_transferred(status_counts):
transferred_total = (
status_counts.get('przekazane do call center - POPC', 0) +
status_counts.get('przekazane do call center - MMP', 0) +
status_counts.get('przekazane do call center - OK - POPC', 0) +
status_counts.get('przekazane do call center - OK', 0) +
status_counts.get('przekazane do call center - MMP - POPC', 0)
)
return transferred_total
def sum_unfinished(status_counts):
unfinished_total = (
status_counts.get('nieukończony wniosek', 0) +
status_counts.get('nieukończony wniosek - POPC', 0)
)
return unfinished_total
def filter_out_tests(data, column):
return data[~data[column].str.contains('Test', case=False, na=False)]
def print_status_counts(title, status_counts, sum_positive, sum_negative, sum_transferred, sum_unfinished):
print(Fore.GREEN + title)
for status, count in status_counts.items():
print(f"{status}: {count}")
print(f"\n{Fore.YELLOW}zbilingowany + zbilingowany - POPC: {sum_positive}")
print(f"{Fore.RED}zbilingowany negatywnie + zbilingowany negatywnie - POPC: {sum_negative}")
print(f"{Fore.CYAN}przekazane do call center - POPC + MMP + OK - POPC + OK + MMP - POPC: {sum_transferred}")
print(f"{Fore.MAGENTA}nieukończony wniosek + nieukończony wniosek - POPC: {sum_unfinished}")
print("\n" + Style.RESET_ALL)
def print_special_status_counts(title, status_counts):
print(Fore.GREEN + title)
for status, count in status_counts.items():
print(f"{status}: {count}")
print("\n" + Style.RESET_ALL)
def main():
file_name = 'raporttestowy.csv'
data = load_data(file_name)
if data is not None:
print(Fore.BLUE + "Data loaded successfully. Here are the first few rows:")
print(data.head())
# Display all column names to identify issues
print(Fore.BLUE + "Columns in the dataset:")
print(data.columns)
# Get the data from columns "D", "L", "P" and "K"
source_column = data.columns[3] # Assuming "D" is the fourth column (index 3)
status_column = data.columns[11] # Assuming "L" is the twelfth column (index 11)
surname_column = data.columns[15] # Assuming "P" is the sixteenth column (index 15)
date_column = data.columns[10] # Assuming "K" is the eleventh column (index 10)
# Check if "Nazwa paczki" is in column "AC" or "AG"
if len(data.columns) > 28 and data.columns[28] == 'Nazwa paczki':
package_column = data.columns[28] # Assuming "AC" is the twenty-ninth column (index 28)
elif len(data.columns) > 32 and data.columns[32] == 'Nazwa paczki':
package_column = data.columns[32] # Assuming "AG" is the thirty-third column (index 32)
else:
print(Fore.RED + "Nazwa paczki column not found.")
return
# Filter out rows where column "P" contains "Test" or "test"
data = filter_out_tests(data, surname_column)
# Filter data to include only rows where column "D" has value "www nowe"
filtered_www = data[data[source_column] == 'www nowe']
# Filter data to include rows where column "D" has value "www nowe" or "mmp nowe"
filtered_combined = data[data[source_column].isin(['www nowe', 'mmp nowe'])]
# Count occurrences of each unique value in the status column for the filtered data
status_counts_www = filtered_www[status_column].value_counts()
status_counts_combined = filtered_combined[status_column].value_counts()
# Print the status counts for 'www nowe'
print_status_counts(
"Status counts for 'www nowe' in column 'Źródło zamówienia':",
status_counts_www,
sum_billed(status_counts_www),
sum_billed(status_counts_www, positive=False),
sum_transferred(status_counts_www),
sum_unfinished(status_counts_www)
)
# Print the status counts for 'www nowe' and 'mmp nowe' combined
print_status_counts(
"Status counts for 'www nowe' and 'mmp nowe' in column 'Źródło zamówienia':",
status_counts_combined,
sum_billed(status_counts_combined),
sum_billed(status_counts_combined, positive=False),
sum_transferred(status_counts_combined),
sum_unfinished(status_counts_combined)
)
# Further filter data to include only rows where column "Nazwa paczki" contains "INT"
filtered_final = filtered_combined[filtered_combined[package_column].str.contains('INT', na=False)]
# Count occurrences of each unique value in the status column for the filtered data
status_counts_final = filtered_final[status_column].value_counts()
# Print the status counts for the final filtered data
print_status_counts(
"Status counts for 'www nowe' and 'mmp nowe' in column 'Źródło zamówienia' with 'INT' in column 'Nazwa paczki':",
status_counts_final,
sum_billed(status_counts_final),
sum_billed(status_counts_final, positive=False),
sum_transferred(status_counts_final),
sum_unfinished(status_counts_final)
)
# Filter data to include only rows where column "Nazwa paczki" contains "MVNO"
filtered_mvno = filtered_combined[filtered_combined[package_column].str.contains('MVNO', na=False)]
# Count occurrences of each unique value in the status column for the filtered data
status_counts_mvno = filtered_mvno[status_column].value_counts()
# Print the status counts for the filtered MVNO data
print_special_status_counts(
"Status counts for 'MVNO' in column 'Nazwa paczki':",
status_counts_mvno
)
# Filter data to include only rows where column "Nazwa paczki" contains "K Vectra TV Smart"
filtered_vtv = filtered_combined[filtered_combined[package_column].str.contains('K Vectra TV Smart', na=False)]
# Count occurrences of each unique value in the status column for the filtered data
status_counts_vtv = filtered_vtv[status_column].value_counts()
# Print the status counts for the filtered VTV data
print_special_status_counts(
"Status counts for 'K Vectra TV Smart' in column 'Nazwa paczki' for 'www nowe' and 'mmp nowe':",
status_counts_vtv
)
# Filter data to include only rows where 'Status zamówienia' is 'zbilingowany' and 'Nazwa paczki' contains 'INT'
filtered_billed_int = filtered_combined[
(filtered_combined[status_column] == 'zbilingowany') &
(filtered_combined[package_column].str.contains('INT', na=False))
]
# Extract and count unique dates in the 'Termin wizyty/montażu' column
filtered_billed_int['Date Only'] = pd.to_datetime(filtered_billed_int[date_column].str.split(' ').str[0], format='%d.%m.%Y')
filtered_billed_int['Month'] = filtered_billed_int['Date Only'].dt.strftime('%m')
filtered_billed_int['Day'] = filtered_billed_int['Date Only'].dt.strftime('%d.%m')
date_counts = filtered_billed_int.groupby('Month')['Day'].value_counts().sort_index(level=[0, 1])
# Print the date counts for the 'Date Only' column, grouped and sorted by month
print(Fore.GREEN + "Data instalacji:")
current_month = None
for (month, day), count in date_counts.items():
if month != current_month:
current_month = month
print(f"\n{month}.")
print(f"{day}: {count}")
# Filter for 'TOP 3' conditions: 'INT' in package column, 'www nowe' or 'mmp nowe' in source column, and 'zbilingowany' in status column
top_filtered = filtered_final[
(filtered_final[source_column].isin(['www nowe', 'mmp nowe'])) &
(filtered_final[status_column] == 'zbilingowany')
]
# Get the top 3 most frequent package names
top_packages = top_filtered[package_column].value_counts().head(3)
# Print the top 3 package names
print(Fore.GREEN + "TOP 3 'INT' packages:")
for package, count in top_packages.items():
print(f"{package}: {count}")
else:
print(Fore.RED + "Failed to load data.")
if __name__ == "__main__":
main()
Editor is loading...
Leave a Comment