Untitled

khoon suka dya ha
 avatar
unknown
plain_text
4 months ago
2.2 kB
2
Indexable
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Step 1: Load the dataset
filepath = "EmployeeRecord.csv"
employee_data = pd.read_csv(filepath)

# Display the first few rows of the DataFrame
print(employee_data.head())

# Create a synthetic binary Salary column based on Years of Experience
employee_data['Salary'] = (employee_data['Years of Experience'] > 10).astype(int)

# Step 2: Select relevant columns
data = employee_data[['Education Level', 'Years of Experience', 'Job Title', 'Salary']]

# Step 3: Define features and target
features = ['Education Level', 'Years of Experience', 'Job Title']
target = 'Salary'

X = data[features]
y = data[target]

# Encode categorical columns using LabelEncoder
le_edu = LabelEncoder()
le_job = LabelEncoder()

X['Education Level'] = le_edu.fit_transform(X['Education Level'])
X['Job Title'] = le_job.fit_transform(X['Job Title'])

# Step 4: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 6: Predict and evaluate the model
y_pred = model.predict(X_test)

# Model Evaluation
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print("\nClassification Report:")
print(class_report)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f}")

# Step 7: Plot Years of Experience vs Predicted Salary Class
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(X_test['Years of Experience'], y_test, color='blue', label='Actual Salary', alpha=0.6)
plt.scatter(X_test['Years of Experience'], y_pred, color='red', label='Predicted Salary', alpha=0.6)
plt.xlabel('Years of Experience')
plt.ylabel('Salary Class (1 = High, 0 = Low)')
plt.title('Years of Experience vs Predicted Salary Class')
plt.legend()
plt.show()
Editor is loading...
Leave a Comment