Untitled
unknown
plain_text
17 days ago
1.1 kB
4
Indexable
import pandas as pd import numpy as np data = { 'Student_ID' : [1,2,3,4,5,6,7,8], 'Name' : ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Helen'], 'Age' : [15,16,np.nan,15,16,17,15,16], 'Gender' : ['F', 'M', 'M', 'M', 'F', np.nan], 'Math_Score' : [85,90,np.nan,75,60,95,80,88], 'Science_Score' : [78,82,70,65,55,99,77,60], 'English_Score' : [88,87,75,70,np.nan,96,89,92] } df = pd.DataFrame(data) print("Initial Dataset") print(df) print("Missing values") print(df.isnull().sum()) df['Age'].fillna(df['Age'].mean(), inplace=True) df['Gender'].fillna(df.['Gender'].mode()[0], inplace=True) df['Math_Score'].fillna(df['Math_Score'].mean(), inplace=True) df['English_Score'].fillna(df['English_Score'].mean(), inplace=True) print("\nDataset after handling missing values") print(df) def cap_outliers(series): Q1 = series.quantile(0.75) Q3 = series.quantile(0.25) IQR = Q3 - Q1 lower_bound = Q1 - 1.5 * IQR upper_bound = Q3 + 1.5 * IQR return series.clip(lower=lower_bound, upper=upper_bound)
Editor is loading...
Leave a Comment