Untitled
unknown
plain_text
8 months ago
1.1 kB
5
Indexable
import pandas as pd
import numpy as np
data = {
'Student_ID' : [1,2,3,4,5,6,7,8],
'Name' : ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Helen'],
'Age' : [15,16,np.nan,15,16,17,15,16],
'Gender' : ['F', 'M', 'M', 'M', 'F', np.nan],
'Math_Score' : [85,90,np.nan,75,60,95,80,88],
'Science_Score' : [78,82,70,65,55,99,77,60],
'English_Score' : [88,87,75,70,np.nan,96,89,92]
}
df = pd.DataFrame(data)
print("Initial Dataset")
print(df)
print("Missing values")
print(df.isnull().sum())
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Gender'].fillna(df.['Gender'].mode()[0], inplace=True)
df['Math_Score'].fillna(df['Math_Score'].mean(), inplace=True)
df['English_Score'].fillna(df['English_Score'].mean(), inplace=True)
print("\nDataset after handling missing values")
print(df)
def cap_outliers(series):
Q1 = series.quantile(0.75)
Q3 = series.quantile(0.25)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
return series.clip(lower=lower_bound, upper=upper_bound)Editor is loading...
Leave a Comment