Untitled

 avatar
unknown
plain_text
17 days ago
1.1 kB
4
Indexable
import pandas as pd
import numpy as np

data = {
    'Student_ID' : [1,2,3,4,5,6,7,8],
    'Name' : ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Helen'],
    'Age' : [15,16,np.nan,15,16,17,15,16],
    'Gender' : ['F', 'M', 'M', 'M', 'F', np.nan],
    'Math_Score' : [85,90,np.nan,75,60,95,80,88],
    'Science_Score' : [78,82,70,65,55,99,77,60],
    'English_Score' : [88,87,75,70,np.nan,96,89,92]
}

df = pd.DataFrame(data)
print("Initial Dataset")
print(df)

print("Missing values")
print(df.isnull().sum())

df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Gender'].fillna(df.['Gender'].mode()[0], inplace=True)
df['Math_Score'].fillna(df['Math_Score'].mean(), inplace=True)
df['English_Score'].fillna(df['English_Score'].mean(), inplace=True)

print("\nDataset after handling missing values")
print(df)

def cap_outliers(series):
    Q1 = series.quantile(0.75)
    Q3 = series.quantile(0.25)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return series.clip(lower=lower_bound, upper=upper_bound)
Editor is loading...
Leave a Comment