Untitled

# Import necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Sample dataset: Weather dataset for decision tree example
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Windy': [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

# Convert dataset to DataFrame
df = pd.DataFrame(data)
# Preprocessing: Convert categorical variables to numeric
X = pd.get_dummies(df[['Outlook', 'Temperature', 'Humidity', 'Windy']])
y = df['PlayTennis'].apply(lambda x: 1 if x == 'Yes' else 0)  # Encode target variable

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the decision tree classifier (ID3 algorithm is a variant of decision tree classifiers)
clf = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf.fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the decision tree classifier: {accuracy:.2f}")

# Visualize the decision tree rules
tree_rules = export_text(clf, feature_names=list(X.columns))
print("\nDecision Tree Rules:\n")
print(tree_rules)

# Classify a new sample
new_sample_dict = {
'Outlook_Sunny': 1, 'Outlook_Overcast': 0, 'Outlook_Rain': 0,
    'Temperature_Hot': 0, 'Temperature_Mild': 1, 'Temperature_Cool': 0,
    'Humidity_High': 0, 'Humidity_Normal': 1, 'Windy_False': 1, 'Windy_True': 0
}
new_sample = pd.DataFrame([new_sample_dict], columns=X.columns).fillna(0)  # Ensure all columns are present
prediction = clf.predict(new_sample)
print("\nPrediction for new sample (1: PlayTennis=Yes, 0: PlayTennis=No):", prediction[0])
Editor is loading...