mail@pastecode.io avatar
a month ago
4.0 kB
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import SGDClassifier
from catboost import CatBoostClassifier, Pool, cv
from sklearn.ensemble import VotingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Sample data in lists
positive_name = ['Alice', 'Bob', 'Charlie']
negative_name = ['David', 'Emma', 'Frank']
positive_age = [25, 30, 35]
negative_age = [40, 45, 50]

# Create a dictionary with lists as values
data = {
    'Name': positive_name + negative_name,
    'Age': positive_age + negative_age,
    'Label': ['positive'] * len(positive_name) + ['negative'] * len(negative_name)

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Separate features (Name and Age) and target label (Label)
X = df[['Name', 'Age']]
y = df['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create pipelines for SGDClassifier and CatBoostClassifier
sgd_pipeline = make_pipeline(StandardScaler(), SGDClassifier(random_state=42))

# Define hyperparameters grid for SGDClassifier
param_grid_sgd = {
    'sgdclassifier__loss': ['hinge', 'log', 'modified_huber'],
    'sgdclassifier__alpha': [0.0001, 0.001, 0.01],
    'sgdclassifier__max_iter': [1000, 2000, 3000],

# Perform GridSearchCV for hyperparameter tuning for SGDClassifier
grid_search_sgd = GridSearchCV(sgd_pipeline, param_grid_sgd, cv=5, n_jobs=-1)
grid_search_sgd.fit(X_train, y_train)

# Get the best estimator from GridSearchCV for SGDClassifier
best_estimator_sgd = grid_search_sgd.best_estimator_

# Make predictions on the test data using the best estimator for SGDClassifier
y_pred_sgd = best_estimator_sgd.predict(X_test)

# Evaluate the best estimator for SGDClassifier
accuracy_sgd = accuracy_score(y_test, y_pred_sgd)
print(f"SGDClassifier Accuracy: {accuracy_sgd:.2f}")

# Display SGDClassifier classification report
print("\nSGDClassifier Classification Report:")
print(classification_report(y_test, y_pred_sgd))

# Create a CatBoostClassifier
catboost_clf = CatBoostClassifier(random_state=42, verbose=0)

# Define hyperparameters grid for CatBoostClassifier
param_grid_catboost = {
    'learning_rate': [0.01, 0.1, 0.5],
    'depth': [3, 5, 7],
    'iterations': [100, 200, 300],

# Perform RandomizedSearchCV for hyperparameter tuning for CatBoostClassifier
random_search_catboost = RandomizedSearchCV(catboost_clf, param_grid_catboost, cv=5, n_jobs=-1, n_iter=10)
random_search_catboost.fit(X_train, y_train, verbose=0)

# Get the best estimator from RandomizedSearchCV for CatBoostClassifier
best_estimator_catboost = random_search_catboost.best_estimator_

# Make predictions on the test data using the best estimator for CatBoostClassifier
y_pred_catboost = best_estimator_catboost.predict(X_test)

# Evaluate the best estimator for CatBoostClassifier
accuracy_catboost = accuracy_score(y_test, y_pred_catboost)
print(f"\nCatBoostClassifier Accuracy: {accuracy_catboost:.2f}")

# Display CatBoostClassifier classification report
print("\nCatBoostClassifier Classification Report:")
print(classification_report(y_test, y_pred_catboost))

# Create an ensemble with VotingClassifier
ensemble = VotingClassifier(estimators=[('sgd', best_estimator_sgd), ('catboost', best_estimator_catboost)], voting='soft')
ensemble.fit(X_train, y_train)

# Make predictions on the test data using the ensemble
y_pred_ensemble = ensemble.predict(X_test)

# Evaluate the ensemble
accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)
print(f"\nEnsemble Accuracy: {accuracy_ensemble:.2f}")

# Display ensemble classification report
print("\nEnsemble Classification Report:")
print(classification_report(y_test, y_pred_ensemble))
Leave a Comment