Untitled
unknown
plain_text
a year ago
4.0 kB
7
Indexable
import pandas as pd from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.linear_model import SGDClassifier from catboost import CatBoostClassifier, Pool, cv from sklearn.ensemble import VotingClassifier from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from sklearn.metrics import accuracy_score, classification_report from sklearn.model_selection import RandomizedSearchCV import numpy as np # Sample data in lists positive_name = ['Alice', 'Bob', 'Charlie'] negative_name = ['David', 'Emma', 'Frank'] positive_age = [25, 30, 35] negative_age = [40, 45, 50] # Create a dictionary with lists as values data = { 'Name': positive_name + negative_name, 'Age': positive_age + negative_age, 'Label': ['positive'] * len(positive_name) + ['negative'] * len(negative_name) } # Create a DataFrame from the dictionary df = pd.DataFrame(data) # Separate features (Name and Age) and target label (Label) X = df[['Name', 'Age']] y = df['Label'] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create pipelines for SGDClassifier and CatBoostClassifier sgd_pipeline = make_pipeline(StandardScaler(), SGDClassifier(random_state=42)) # Define hyperparameters grid for SGDClassifier param_grid_sgd = { 'sgdclassifier__loss': ['hinge', 'log', 'modified_huber'], 'sgdclassifier__alpha': [0.0001, 0.001, 0.01], 'sgdclassifier__max_iter': [1000, 2000, 3000], } # Perform GridSearchCV for hyperparameter tuning for SGDClassifier grid_search_sgd = GridSearchCV(sgd_pipeline, param_grid_sgd, cv=5, n_jobs=-1) grid_search_sgd.fit(X_train, y_train) # Get the best estimator from GridSearchCV for SGDClassifier best_estimator_sgd = grid_search_sgd.best_estimator_ # Make predictions on the test data using the best estimator for SGDClassifier y_pred_sgd = best_estimator_sgd.predict(X_test) # Evaluate the best estimator for SGDClassifier accuracy_sgd = accuracy_score(y_test, y_pred_sgd) print(f"SGDClassifier Accuracy: {accuracy_sgd:.2f}") # Display SGDClassifier classification report print("\nSGDClassifier Classification Report:") print(classification_report(y_test, y_pred_sgd)) # Create a CatBoostClassifier catboost_clf = CatBoostClassifier(random_state=42, verbose=0) # Define hyperparameters grid for CatBoostClassifier param_grid_catboost = { 'learning_rate': [0.01, 0.1, 0.5], 'depth': [3, 5, 7], 'iterations': [100, 200, 300], } # Perform RandomizedSearchCV for hyperparameter tuning for CatBoostClassifier random_search_catboost = RandomizedSearchCV(catboost_clf, param_grid_catboost, cv=5, n_jobs=-1, n_iter=10) random_search_catboost.fit(X_train, y_train, verbose=0) # Get the best estimator from RandomizedSearchCV for CatBoostClassifier best_estimator_catboost = random_search_catboost.best_estimator_ # Make predictions on the test data using the best estimator for CatBoostClassifier y_pred_catboost = best_estimator_catboost.predict(X_test) # Evaluate the best estimator for CatBoostClassifier accuracy_catboost = accuracy_score(y_test, y_pred_catboost) print(f"\nCatBoostClassifier Accuracy: {accuracy_catboost:.2f}") # Display CatBoostClassifier classification report print("\nCatBoostClassifier Classification Report:") print(classification_report(y_test, y_pred_catboost)) # Create an ensemble with VotingClassifier ensemble = VotingClassifier(estimators=[('sgd', best_estimator_sgd), ('catboost', best_estimator_catboost)], voting='soft') ensemble.fit(X_train, y_train) # Make predictions on the test data using the ensemble y_pred_ensemble = ensemble.predict(X_test) # Evaluate the ensemble accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble) print(f"\nEnsemble Accuracy: {accuracy_ensemble:.2f}") # Display ensemble classification report print("\nEnsemble Classification Report:") print(classification_report(y_test, y_pred_ensemble))
Editor is loading...
Leave a Comment