Untitled
unknown
plain_text
a year ago
3.0 kB
2
Indexable
import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_predict, cross_val_score, train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix from sklearn.preprocessing import StandardScaler # Load iris dataset iris = load_iris() X, y = iris.data[:, :2], iris.target # Display unique class names available in the original dataset class_names_original = iris.target_names print("Class names available in the original dataset:", class_names_original) # Create a DataFrame to display the first 10 rows of the original dataset df = pd.DataFrame(np.column_stack((X, y)), columns=iris.feature_names[:2] + ['target']) # Display the first 10 rows of the original data print("\nFirst 10 rows of the original dataset:") print(df.head(10)) # Display a summary of the original dataset print("\nSummary of the original dataset:") print(df.describe()) # Select only the first two classes for binary classification selected_class_names = class_names_original[:2] X = X[np.isin(y, [iris.target_names.tolist().index(class_name) for class_name in selected_class_names])] y = y[np.isin(y, [iris.target_names.tolist().index(class_name) for class_name in selected_class_names])] # Display the selected class names for binary classification print(f"\nSelected class names for binary classification: {selected_class_names}") # Introduce noise by randomly flipping some labels np.random.seed(42) # for reproducibility random_indices = np.random.choice(len(y), size=int(0.2 * len(y)), replace=False) y[random_indices] = 1 - y[random_indices] # flip labels # Standardize the features (optional but often recommended for logistic regression) scaler = StandardScaler() X = scaler.fit_transform(X) # Create a logistic regression model model = LogisticRegression() # Perform cross-validation y_pred_cv = cross_val_predict(model, X, y, cv=5) # 5-fold cross-validation # Calculate accuracy using cross-validation scores accuracy_cv = np.mean(cross_val_score(model, X, y, cv=5, scoring='accuracy')) print(f"\nCross-validated Accuracy: {accuracy_cv:.2f}") # Display the unique classes used for classification unique_classes = np.unique(y) print(f"Unique classes used for classification: {unique_classes}") # Compute the confusion matrix cm = confusion_matrix(y, y_pred_cv) # Display the confusion matrix print("\nConfusion Matrix:") print(cm) # Define the sigmoid function def sigmoid(x): return 1 / (1 + np.exp(-x)) # Plot the scatter plot of predicted probabilities plt.scatter(X[:, 0], y_pred_cv, label='Predicted Probabilities', marker='o', c=y, cmap='coolwarm', alpha=0.7) plt.xlabel('Feature 1') plt.ylabel('Predicted Probabilities') plt.title('Scatter Plot of Predicted Probabilities using Cross-Validation on Binary Iris Dataset with Noise') plt.legend() plt.show()
Editor is loading...
Leave a Comment