Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
2.8 kB
3
Indexable
Never
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Define the data
data = {'Product ID': np.arange(1, 51),
'Product Name': ['Product' + str(i) for i in range(1, 51)], 
'Category': np.random.choice(['Electronics', 'Clothing', 'Home Goods'], size=50),
'Price': np.round(np.random.uniform(10, 100, size=50), 2),
'Quantity Sold': np.random.randint(1, 50, size=50), 
'Date Sold': pd.date_range('2022-01-01', periods=50)}

# Create a pandas DataFrame 
sales_df = pd.DataFrame(data)

#Load the data from the CSV file (if applicable)
#sales_df = pd.read_csv('sales_data.csv')

#Data cleaning
sales_df = sales_df.drop_duplicates()
sales_df = sales_df.dropna()

#Exploratory data analysis
total_sales_by_product = sales_df.groupby('Product Name')['Price'].sum().sort_values(ascending=False)
total_sales_by_category = sales_df.groupby('Category')['Price'].sum().sort_values(ascending=False) 
avg_price_by_product = sales_df.groupby('Product Name')['Price'].mean().sort_values(ascending=False) 
avg_price_by_category=sales_df.groupby('Category')['Price'].mean().sort_values(ascending=False) 
top_selling_products = sales_df.groupby('Product Name')['Quantity Sold'].sum().sort_values(ascending=False)[:5] 
top_selling_categories = sales_df.groupby('Category')['Quantity Sold'].sum().sort_values(ascending=False)[:5]

# Visualization

plt.figure(figsize=(10, 8))
total_sales_by_product.plot(kind='bar')
plt.title('Total Sales by Product')
plt.xlabel('Product Name')
plt.ylabel('Total Sales')

plt.show()

plt.figure(figsize=(10, 8))
total_sales_by_category.plot(kind='bar')
plt.title('Total Sales by Category')
plt.xlabel('Category')

plt.ylabel('Total Sales')

plt.show()

plt.figure(figsize=(10, 8))
avg_price_by_product.plot(kind='bar')
plt.title('Average Price by Product')
plt.xlabel('Product Name')
plt.ylabel('Average Price')

plt.show()

plt.figure(figsize=(10, 8))
avg_price_by_category.plot(kind='bar')
plt.title('Average Price by Category')
plt.xlabel('Category')
plt.ylabel('Average Price')

plt.show()

plt.figure(figsize=(10, 8))
top_selling_products.plot(kind='bar')
plt.title('Top Selling Products')
plt.xlabel('Product Name')

plt.ylabel('Quantity Sold')
plt.show()

plt.figure(figsize=(10, 8))
top_selling_categories.plot(kind='bar')
plt.title('Top Selling Categories')
plt.xlabel('Category')
plt.ylabel('Quantity Sold')

plt.show()

print('Total Sales by Product:\n', total_sales_by_product)
print('Total Sales by Category:\n', total_sales_by_category)
print('Average Price by Product:\n', avg_price_by_product)
print('Average Price by Category:\n', avg_price_by_category)
print('Top Selling Products:\n', top_selling_products)
print('Top Selling Categories:\n', top_selling_categories)