Untitled

mail@pastecode.io avatar
unknown
plain_text
21 days ago
2.3 kB
40
Indexable
Never
!pip install mlxtend
pip install pandas mlxtend matplotlib
# Importing necessary libraries
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd

# Load the dataset
data = {'TransactionID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'Item1': ['Bread', 'Bread', 'Milk', 'Bread', 'Bread', 'Diaper', 'Bread', 'Bread', 'Milk', 'Bread'],
        'Item2': ['Milk', 'Diaper', 'Diaper', 'Milk', 'Diaper', 'Cola', 'Milk', None, 'Diaper', 'Diaper'],
        'Item3': ['Diaper', 'Beer', 'Cola', None, 'Beer', None, 'Diaper', None, None, 'Cola'],
        'Item4': [None, None, None, None, 'Cola', None, 'Beer', None, None, None],
        'Item5': [None, None, None, None, None, None, None, None, None, None]}

df = pd.DataFrame(data)

# Print the first 10 rows to verify the dataset
print("Initial Dataset:")
print(df.head(10))

# Create a set of unique items
items = set()
for col in df.columns[1:]:  # Exclude 'TransactionID'
    items.update(df[col].dropna().unique())

# Create an itemset
itemset = set(items)

# One-Hot Encoding of the transactions
encoded_vals = []
for index, row in df.iterrows():
    rowset = set(row[1:].dropna())  # Exclude 'TransactionID' and drop NaN values
    labels = {item: 0 for item in itemset}  # Initialize all items to 0
    for item in rowset:
        labels[item] = 1  # Set the item to 1 if present
    encoded_vals.append(labels)

# Convert to DataFrame (one-hot encoded DataFrame)
ohe_df = pd.DataFrame(encoded_vals)

# Check the encoded data
print("\nOne-Hot Encoded Data:")
print(ohe_df.head())

# Apply the Apriori algorithm with a lower support threshold (0.1)
freq_items = apriori(ohe_df, min_support=0.1, use_colnames=True, verbose=1)

# Display the frequent itemsets
print("\nFrequent Itemsets:")
print(freq_items.head())

# Generate association rules with minimum confidence of 0.6
rules = association_rules(freq_items, metric="confidence", min_threshold=0.6)

# Display the association rules
print("\nAssociation Rules:")
print(rules.head())

# Optional: Plotting support vs confidence
import matplotlib.pyplot as plt
plt.scatter(rules['support'], rules['confidence'], alpha=0.5, marker="o")
plt.xlabel('Support')
plt.ylabel('Confidence')
plt.title('Support vs Confidence')
plt.show()
Leave a Comment