Untitled

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier
from sklearn.ensemble import VotingClassifier

# Sample data in lists
positive_name = ['Alice', 'Bob', 'Charlie']
negative_name = ['David', 'Emma', 'Frank']
positive_age = [25, 30, 35]
negative_age = [40, 45, 50]

# Create a dictionary with lists as values
data = {
    'Name': positive_name + negative_name,
    'Age': positive_age + negative_age,
    'Label': ['positive'] * len(positive_name) + ['negative'] * len(negative_name)
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Separate features (Name and Age) and target label (Label)
X = df[['Name', 'Age']]
y = df['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with StandardScaler and XGBoost classifier
pipeline = make_pipeline(StandardScaler(), XGBClassifier(random_state=42))

# Define hyperparameters grid for GridSearchCV
param_grid = {
    'xgbclassifier__n_estimators': [50, 100, 200],
    'xgbclassifier__max_depth': [3, 5, 7],
    'xgbclassifier__learning_rate': [0.1, 0.01, 0.001]
}

# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best estimator from GridSearchCV
best_estimator = grid_search.best_estimator_

# Make predictions on the test data using the best estimator
y_pred = best_estimator.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Create an ensemble with XGBoost and a VotingClassifier
xgb_clf = XGBClassifier(random_state=42)
voting_clf = VotingClassifier(estimators=[('xgb', xgb_clf), ('best', best_estimator)], voting='soft')
voting_clf.fit(X_train, y_train)

# Make predictions on the test data using the ensemble
y_pred_ensemble = voting_clf.predict(X_test)

# Evaluate the ensemble
accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)
print(f"\nEnsemble Accuracy: {accuracy_ensemble:.2f}")

# Display ensemble classification report
print("\nEnsemble Classification Report:")
print(classification_report(y_test, y_pred_ensemble))
Editor is loading...