wage_discrimination
unknown
python
a year ago
1.3 kB
10
Indexable
import pandas as pd import numpy as np import statsmodels.formula.api as smf def run(): # Create a DataFrame with 1000 observations df = pd.DataFrame(index=range(1000)) # Generate 'female' column df['female'] = 0 df.loc[500:, 'female'] = 1 # Generate 'education_cost' column df['education_cost'] = np.random.uniform(0, 1, 1000) # Generate 'wage' column for men df.loc[(df['female'] == 0) & (df['education_cost'] >= 0.5), 'wage'] = 10 df.loc[(df['female'] == 0) & (df['education_cost'] < 0.5), 'wage'] = 5 # Women are paid the same for the same job, but it is harder for them to get education df.loc[(df['female'] == 1) & (df['education_cost'] >= 0.7), 'wage'] = 10 df.loc[(df['female'] == 1) & (df['education_cost'] < 0.7), 'wage'] = 5 # Generate 'goteducation' column, anyone with wage > 5 must have gotten education df['goteducation'] = 0 df.loc[df['wage'] > 7, 'goteducation'] = 1 # Log-transform 'wage' df['log_wage'] = np.log(df['wage']) # Run regressions model1 = smf.ols(formula='log_wage ~ female', data=df).fit() model2 = smf.ols(formula='log_wage ~ female + goteducation', data=df).fit() print(model1.summary()) print(model2.summary()) run()
Editor is loading...
Leave a Comment