hidden_discrimination
williamlegend
python
a year ago
1.1 kB
6
Indexable
import pandas as pd import numpy as np import statsmodels.formula.api as smf def run(): df = pd.DataFrame(index=range(200)) # Generate 'female' column df['female'] = 0 df.loc[100:, 'female'] = 1 # Generate 'education_cost' column df['education_cost'] = np.random.uniform(0, 1, 200) # Generate 'goteducation' column df['goteducation'] = 0 df.loc[(df['female'] == 0) & ( df['education_cost'] >= 0.5), 'goteducation'] = 1 # Because of discrimination, females need to meet a higher "threshold" to get education df.loc[(df['female'] == 1) & ( df['education_cost'] >= 0.7), 'goteducation'] = 1 # Generate 'wage' column based on education df['wage'] = 5 df.loc[df['goteducation'] == 1, 'wage'] = 10 # Log-transform 'wage' df['log_wage'] = np.log(df['wage']) # Run regressions model1 = smf.ols(formula='log_wage ~ female', data=df).fit() model2 = smf.ols(formula='log_wage ~ female + goteducation', data=df).fit() print(model1.summary()) print(model2.summary()) run()
Editor is loading...
Leave a Comment