Untitled
unknown
plain_text
6 months ago
2.5 kB
5
Indexable
Never
import pandas as pd from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from sklearn.metrics import accuracy_score, classification_report from xgboost import XGBClassifier from sklearn.ensemble import VotingClassifier # Sample data in lists positive_name = ['Alice', 'Bob', 'Charlie'] negative_name = ['David', 'Emma', 'Frank'] positive_age = [25, 30, 35] negative_age = [40, 45, 50] # Create a dictionary with lists as values data = { 'Name': positive_name + negative_name, 'Age': positive_age + negative_age, 'Label': ['positive'] * len(positive_name) + ['negative'] * len(negative_name) } # Create a DataFrame from the dictionary df = pd.DataFrame(data) # Separate features (Name and Age) and target label (Label) X = df[['Name', 'Age']] y = df['Label'] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create a pipeline with StandardScaler and XGBoost classifier pipeline = make_pipeline(StandardScaler(), XGBClassifier(random_state=42)) # Define hyperparameters grid for GridSearchCV param_grid = { 'xgbclassifier__n_estimators': [50, 100, 200], 'xgbclassifier__max_depth': [3, 5, 7], 'xgbclassifier__learning_rate': [0.1, 0.01, 0.001] } # Perform GridSearchCV for hyperparameter tuning grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1) grid_search.fit(X_train, y_train) # Get the best estimator from GridSearchCV best_estimator = grid_search.best_estimator_ # Make predictions on the test data using the best estimator y_pred = best_estimator.predict(X_test) # Evaluate the model accuracy = accuracy_score(y_test, y_pred) print(f"Accuracy: {accuracy:.2f}") # Display classification report print("\nClassification Report:") print(classification_report(y_test, y_pred)) # Create an ensemble with XGBoost and a VotingClassifier xgb_clf = XGBClassifier(random_state=42) voting_clf = VotingClassifier(estimators=[('xgb', xgb_clf), ('best', best_estimator)], voting='soft') voting_clf.fit(X_train, y_train) # Make predictions on the test data using the ensemble y_pred_ensemble = voting_clf.predict(X_test) # Evaluate the ensemble accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble) print(f"\nEnsemble Accuracy: {accuracy_ensemble:.2f}") # Display ensemble classification report print("\nEnsemble Classification Report:") print(classification_report(y_test, y_pred_ensemble))
Leave a Comment