Untitled
unknown
plain_text
21 days ago
2.3 kB
40
Indexable
Never
!pip install mlxtend pip install pandas mlxtend matplotlib # Importing necessary libraries from mlxtend.frequent_patterns import apriori, association_rules import pandas as pd # Load the dataset data = {'TransactionID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'Item1': ['Bread', 'Bread', 'Milk', 'Bread', 'Bread', 'Diaper', 'Bread', 'Bread', 'Milk', 'Bread'], 'Item2': ['Milk', 'Diaper', 'Diaper', 'Milk', 'Diaper', 'Cola', 'Milk', None, 'Diaper', 'Diaper'], 'Item3': ['Diaper', 'Beer', 'Cola', None, 'Beer', None, 'Diaper', None, None, 'Cola'], 'Item4': [None, None, None, None, 'Cola', None, 'Beer', None, None, None], 'Item5': [None, None, None, None, None, None, None, None, None, None]} df = pd.DataFrame(data) # Print the first 10 rows to verify the dataset print("Initial Dataset:") print(df.head(10)) # Create a set of unique items items = set() for col in df.columns[1:]: # Exclude 'TransactionID' items.update(df[col].dropna().unique()) # Create an itemset itemset = set(items) # One-Hot Encoding of the transactions encoded_vals = [] for index, row in df.iterrows(): rowset = set(row[1:].dropna()) # Exclude 'TransactionID' and drop NaN values labels = {item: 0 for item in itemset} # Initialize all items to 0 for item in rowset: labels[item] = 1 # Set the item to 1 if present encoded_vals.append(labels) # Convert to DataFrame (one-hot encoded DataFrame) ohe_df = pd.DataFrame(encoded_vals) # Check the encoded data print("\nOne-Hot Encoded Data:") print(ohe_df.head()) # Apply the Apriori algorithm with a lower support threshold (0.1) freq_items = apriori(ohe_df, min_support=0.1, use_colnames=True, verbose=1) # Display the frequent itemsets print("\nFrequent Itemsets:") print(freq_items.head()) # Generate association rules with minimum confidence of 0.6 rules = association_rules(freq_items, metric="confidence", min_threshold=0.6) # Display the association rules print("\nAssociation Rules:") print(rules.head()) # Optional: Plotting support vs confidence import matplotlib.pyplot as plt plt.scatter(rules['support'], rules['confidence'], alpha=0.5, marker="o") plt.xlabel('Support') plt.ylabel('Confidence') plt.title('Support vs Confidence') plt.show()
Leave a Comment