Untitled
unknown
plain_text
2 years ago
4.0 kB
9
Indexable
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import SGDClassifier
from catboost import CatBoostClassifier, Pool, cv
from sklearn.ensemble import VotingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import RandomizedSearchCV
import numpy as np
# Sample data in lists
positive_name = ['Alice', 'Bob', 'Charlie']
negative_name = ['David', 'Emma', 'Frank']
positive_age = [25, 30, 35]
negative_age = [40, 45, 50]
# Create a dictionary with lists as values
data = {
'Name': positive_name + negative_name,
'Age': positive_age + negative_age,
'Label': ['positive'] * len(positive_name) + ['negative'] * len(negative_name)
}
# Create a DataFrame from the dictionary
df = pd.DataFrame(data)
# Separate features (Name and Age) and target label (Label)
X = df[['Name', 'Age']]
y = df['Label']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create pipelines for SGDClassifier and CatBoostClassifier
sgd_pipeline = make_pipeline(StandardScaler(), SGDClassifier(random_state=42))
# Define hyperparameters grid for SGDClassifier
param_grid_sgd = {
'sgdclassifier__loss': ['hinge', 'log', 'modified_huber'],
'sgdclassifier__alpha': [0.0001, 0.001, 0.01],
'sgdclassifier__max_iter': [1000, 2000, 3000],
}
# Perform GridSearchCV for hyperparameter tuning for SGDClassifier
grid_search_sgd = GridSearchCV(sgd_pipeline, param_grid_sgd, cv=5, n_jobs=-1)
grid_search_sgd.fit(X_train, y_train)
# Get the best estimator from GridSearchCV for SGDClassifier
best_estimator_sgd = grid_search_sgd.best_estimator_
# Make predictions on the test data using the best estimator for SGDClassifier
y_pred_sgd = best_estimator_sgd.predict(X_test)
# Evaluate the best estimator for SGDClassifier
accuracy_sgd = accuracy_score(y_test, y_pred_sgd)
print(f"SGDClassifier Accuracy: {accuracy_sgd:.2f}")
# Display SGDClassifier classification report
print("\nSGDClassifier Classification Report:")
print(classification_report(y_test, y_pred_sgd))
# Create a CatBoostClassifier
catboost_clf = CatBoostClassifier(random_state=42, verbose=0)
# Define hyperparameters grid for CatBoostClassifier
param_grid_catboost = {
'learning_rate': [0.01, 0.1, 0.5],
'depth': [3, 5, 7],
'iterations': [100, 200, 300],
}
# Perform RandomizedSearchCV for hyperparameter tuning for CatBoostClassifier
random_search_catboost = RandomizedSearchCV(catboost_clf, param_grid_catboost, cv=5, n_jobs=-1, n_iter=10)
random_search_catboost.fit(X_train, y_train, verbose=0)
# Get the best estimator from RandomizedSearchCV for CatBoostClassifier
best_estimator_catboost = random_search_catboost.best_estimator_
# Make predictions on the test data using the best estimator for CatBoostClassifier
y_pred_catboost = best_estimator_catboost.predict(X_test)
# Evaluate the best estimator for CatBoostClassifier
accuracy_catboost = accuracy_score(y_test, y_pred_catboost)
print(f"\nCatBoostClassifier Accuracy: {accuracy_catboost:.2f}")
# Display CatBoostClassifier classification report
print("\nCatBoostClassifier Classification Report:")
print(classification_report(y_test, y_pred_catboost))
# Create an ensemble with VotingClassifier
ensemble = VotingClassifier(estimators=[('sgd', best_estimator_sgd), ('catboost', best_estimator_catboost)], voting='soft')
ensemble.fit(X_train, y_train)
# Make predictions on the test data using the ensemble
y_pred_ensemble = ensemble.predict(X_test)
# Evaluate the ensemble
accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)
print(f"\nEnsemble Accuracy: {accuracy_ensemble:.2f}")
# Display ensemble classification report
print("\nEnsemble Classification Report:")
print(classification_report(y_test, y_pred_ensemble))
Editor is loading...
Leave a Comment