wage_discrimination
unknown
python
2 years ago
1.3 kB
24
Indexable
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
def run():
# Create a DataFrame with 1000 observations
df = pd.DataFrame(index=range(1000))
# Generate 'female' column
df['female'] = 0
df.loc[500:, 'female'] = 1
# Generate 'education_cost' column
df['education_cost'] = np.random.uniform(0, 1, 1000)
# Generate 'wage' column for men
df.loc[(df['female'] == 0) & (df['education_cost'] >= 0.5), 'wage'] = 10
df.loc[(df['female'] == 0) & (df['education_cost'] < 0.5), 'wage'] = 5
# Women are paid the same for the same job, but it is harder for them to get education
df.loc[(df['female'] == 1) & (df['education_cost'] >= 0.7), 'wage'] = 10
df.loc[(df['female'] == 1) & (df['education_cost'] < 0.7), 'wage'] = 5
# Generate 'goteducation' column, anyone with wage > 5 must have gotten education
df['goteducation'] = 0
df.loc[df['wage'] > 7, 'goteducation'] = 1
# Log-transform 'wage'
df['log_wage'] = np.log(df['wage'])
# Run regressions
model1 = smf.ols(formula='log_wage ~ female', data=df).fit()
model2 = smf.ols(formula='log_wage ~ female + goteducation', data=df).fit()
print(model1.summary())
print(model2.summary())
run()Editor is loading...
Leave a Comment