Untitled
khoon suka dya haunknown
plain_text
4 months ago
2.2 kB
2
Indexable
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix, classification_report, accuracy_score from sklearn.preprocessing import LabelEncoder # Step 1: Load the dataset filepath = "EmployeeRecord.csv" employee_data = pd.read_csv(filepath) # Display the first few rows of the DataFrame print(employee_data.head()) # Create a synthetic binary Salary column based on Years of Experience employee_data['Salary'] = (employee_data['Years of Experience'] > 10).astype(int) # Step 2: Select relevant columns data = employee_data[['Education Level', 'Years of Experience', 'Job Title', 'Salary']] # Step 3: Define features and target features = ['Education Level', 'Years of Experience', 'Job Title'] target = 'Salary' X = data[features] y = data[target] # Encode categorical columns using LabelEncoder le_edu = LabelEncoder() le_job = LabelEncoder() X['Education Level'] = le_edu.fit_transform(X['Education Level']) X['Job Title'] = le_job.fit_transform(X['Job Title']) # Step 4: Split the dataset X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Step 5: Train a Logistic Regression model model = LogisticRegression() model.fit(X_train, y_train) # Step 6: Predict and evaluate the model y_pred = model.predict(X_test) # Model Evaluation conf_matrix = confusion_matrix(y_test, y_pred) print("Confusion Matrix:") print(conf_matrix) class_report = classification_report(y_test, y_pred) print("\nClassification Report:") print(class_report) accuracy = accuracy_score(y_test, y_pred) print(f"Accuracy: {accuracy:.3f}") # Step 7: Plot Years of Experience vs Predicted Salary Class import matplotlib.pyplot as plt plt.figure(figsize=(8, 6)) plt.scatter(X_test['Years of Experience'], y_test, color='blue', label='Actual Salary', alpha=0.6) plt.scatter(X_test['Years of Experience'], y_pred, color='red', label='Predicted Salary', alpha=0.6) plt.xlabel('Years of Experience') plt.ylabel('Salary Class (1 = High, 0 = Low)') plt.title('Years of Experience vs Predicted Salary Class') plt.legend() plt.show()
Editor is loading...
Leave a Comment