Untitled
unknown
plain_text
2 years ago
2.8 kB
5
Indexable
import pandas as pd import numpy as np import matplotlib.pyplot as plt # Define the data data = {'Product ID': np.arange(1, 51), 'Product Name': ['Product' + str(i) for i in range(1, 51)], 'Category': np.random.choice(['Electronics', 'Clothing', 'Home Goods'], size=50), 'Price': np.round(np.random.uniform(10, 100, size=50), 2), 'Quantity Sold': np.random.randint(1, 50, size=50), 'Date Sold': pd.date_range('2022-01-01', periods=50)} # Create a pandas DataFrame sales_df = pd.DataFrame(data) #Load the data from the CSV file (if applicable) #sales_df = pd.read_csv('sales_data.csv') #Data cleaning sales_df = sales_df.drop_duplicates() sales_df = sales_df.dropna() #Exploratory data analysis total_sales_by_product = sales_df.groupby('Product Name')['Price'].sum().sort_values(ascending=False) total_sales_by_category = sales_df.groupby('Category')['Price'].sum().sort_values(ascending=False) avg_price_by_product = sales_df.groupby('Product Name')['Price'].mean().sort_values(ascending=False) avg_price_by_category=sales_df.groupby('Category')['Price'].mean().sort_values(ascending=False) top_selling_products = sales_df.groupby('Product Name')['Quantity Sold'].sum().sort_values(ascending=False)[:5] top_selling_categories = sales_df.groupby('Category')['Quantity Sold'].sum().sort_values(ascending=False)[:5] # Visualization plt.figure(figsize=(10, 8)) total_sales_by_product.plot(kind='bar') plt.title('Total Sales by Product') plt.xlabel('Product Name') plt.ylabel('Total Sales') plt.show() plt.figure(figsize=(10, 8)) total_sales_by_category.plot(kind='bar') plt.title('Total Sales by Category') plt.xlabel('Category') plt.ylabel('Total Sales') plt.show() plt.figure(figsize=(10, 8)) avg_price_by_product.plot(kind='bar') plt.title('Average Price by Product') plt.xlabel('Product Name') plt.ylabel('Average Price') plt.show() plt.figure(figsize=(10, 8)) avg_price_by_category.plot(kind='bar') plt.title('Average Price by Category') plt.xlabel('Category') plt.ylabel('Average Price') plt.show() plt.figure(figsize=(10, 8)) top_selling_products.plot(kind='bar') plt.title('Top Selling Products') plt.xlabel('Product Name') plt.ylabel('Quantity Sold') plt.show() plt.figure(figsize=(10, 8)) top_selling_categories.plot(kind='bar') plt.title('Top Selling Categories') plt.xlabel('Category') plt.ylabel('Quantity Sold') plt.show() print('Total Sales by Product:\n', total_sales_by_product) print('Total Sales by Category:\n', total_sales_by_category) print('Average Price by Product:\n', avg_price_by_product) print('Average Price by Category:\n', avg_price_by_category) print('Top Selling Products:\n', top_selling_products) print('Top Selling Categories:\n', top_selling_categories)
Editor is loading...