Untitled
unknown
plain_text
a year ago
2.3 kB
48
Indexable
!pip install mlxtend
pip install pandas mlxtend matplotlib
# Importing necessary libraries
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd
# Load the dataset
data = {'TransactionID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'Item1': ['Bread', 'Bread', 'Milk', 'Bread', 'Bread', 'Diaper', 'Bread', 'Bread', 'Milk', 'Bread'],
'Item2': ['Milk', 'Diaper', 'Diaper', 'Milk', 'Diaper', 'Cola', 'Milk', None, 'Diaper', 'Diaper'],
'Item3': ['Diaper', 'Beer', 'Cola', None, 'Beer', None, 'Diaper', None, None, 'Cola'],
'Item4': [None, None, None, None, 'Cola', None, 'Beer', None, None, None],
'Item5': [None, None, None, None, None, None, None, None, None, None]}
df = pd.DataFrame(data)
# Print the first 10 rows to verify the dataset
print("Initial Dataset:")
print(df.head(10))
# Create a set of unique items
items = set()
for col in df.columns[1:]: # Exclude 'TransactionID'
items.update(df[col].dropna().unique())
# Create an itemset
itemset = set(items)
# One-Hot Encoding of the transactions
encoded_vals = []
for index, row in df.iterrows():
rowset = set(row[1:].dropna()) # Exclude 'TransactionID' and drop NaN values
labels = {item: 0 for item in itemset} # Initialize all items to 0
for item in rowset:
labels[item] = 1 # Set the item to 1 if present
encoded_vals.append(labels)
# Convert to DataFrame (one-hot encoded DataFrame)
ohe_df = pd.DataFrame(encoded_vals)
# Check the encoded data
print("\nOne-Hot Encoded Data:")
print(ohe_df.head())
# Apply the Apriori algorithm with a lower support threshold (0.1)
freq_items = apriori(ohe_df, min_support=0.1, use_colnames=True, verbose=1)
# Display the frequent itemsets
print("\nFrequent Itemsets:")
print(freq_items.head())
# Generate association rules with minimum confidence of 0.6
rules = association_rules(freq_items, metric="confidence", min_threshold=0.6)
# Display the association rules
print("\nAssociation Rules:")
print(rules.head())
# Optional: Plotting support vs confidence
import matplotlib.pyplot as plt
plt.scatter(rules['support'], rules['confidence'], alpha=0.5, marker="o")
plt.xlabel('Support')
plt.ylabel('Confidence')
plt.title('Support vs Confidence')
plt.show()
Editor is loading...
Leave a Comment