a year ago
4.4 kB
{r} immig <- read_csv("immig.csv") |> transmute( age, female = as.factor(female), employed, nontech.whitcol, tech.whitcol, explicit = expl.prejud, implicit = impl.prejud, support_visas = as.factor(h1bvis.supp), support_imm = as.factor(indimm.supp)) #str(immig) #summary(immig) #create_report(immig) Question 1 Distribution {r} immig |> ggplot(aes( x = support_visas))+ geom_bar() ; immig |> # el ; sirve para que te ponga a la vez los resultados ggplot(aes( x = support_imm))+ geom_bar() Proportion of people willing to increase: {r} proportions <- immig |> mutate( support = as.factor(if_else( support_visas == 0.75 | support_visas == 1 | support_imm == 0.75 | support_imm == 1, 1, 0))) proportions |> group_by(support) |> summarise(total = n()) |> mutate(prop = total/sum(total)) Measures of cultural threat: {r} immig |> ggplot(aes(y = explicit, x = implicit)) + geom_point()+ geom_smooth(method = "lm") {r} cor(immig$explicit, immig$implicit) # does not work if there is missing data Question 2 {r} df2 <-data.frame(immig[1:1128, 6:9]) {r message=FALSE, warning=FALSE} # message = FALSE to avoid messages appearinf when running the code. # warning = FALSE to avoid the warning messages (arma de doble filo) require(ggcorrplot) model.matrix(~0+., data=df2) %>% cor(use="pairwise.complete.obs") %>% ggcorrplot(show.diag=FALSE, type="lower", lab=TRUE, lab_size=2) Regression: If the outcome variable is factor, linear regression should not work. But it could be, i just dont understand. I'm using as.numeric() in order to get back the numeric variable. {r} model1 <- lm(as.numeric(support_visas) ~ tech.whitcol, data = immig) model2 <- lm(as.numeric(support_imm) ~ tech.whitcol, data = immig)# if outcome is factor, linear regression does not work. {r} summary(model1) summary(model2) Question 3 New variable. There are various ways; this is my "style" of programming. Like the fighting style of jedis. You'll dominate one someday. {r} df3 <- immig |> mutate( sector = as.factor( case_when( tech.whitcol == 1 ~ "tech", nontech.whitcol == 1 ~ "whitecollar", employed == 1 ~ "employed", employed == 0 ~ "unemployed" ) ) ) Again, if the outcome variable (support_visas) is factor, linear regression is not valid. I put as.numeric so R can run the regression {r} model3 <- lm(as.numeric(support_visas) ~ sector , data = df3) model4 <- lm(as.numeric(support_imm) ~ sector, data = df3)# if outcome is factor, linear regression does not work. {r} summary(model3); summary(model4) {r} model5 <- lm(as.numeric(support_visas) ~ sector + age + female + implicit + explicit, data = df3) summary(model5) Question 4 {r} model6 <- lm(as.numeric(support_visas) ~ female*implicit , data = df3) summary(model6) {r} nd = expand.grid(female = 0:1, implicit = df3$implicit) |> mutate( female = as.factor(female)) nd$y = predict(model6, newdata = nd) nd$se = predict(model6, newdata = nd, se.fit=T)$se.fit nd$upr = nd$y + qnorm(0.975) * nd$se nd$lwr = nd$y - qnorm(0.975) * nd$se {r} ggplot(nd, aes(x = interaction(female, implicit), y = y, color = female)) + geom_pointrange(data = nd, aes(ymin = lwr, ymax = upr), alpha = 0.2)+ labs( title = "Prediction of visa support according to gender", subtitle = "", x = "Implicit bias", y = "Predicted level of visa support", ) + theme_classic() + #scale_x_discrete( solucionar lo del eje x para que aparezca alguna label # breaks = c(2, 2,5, 3, 3.5), #labels = c("2", "2.5", "3", "3.5")) theme( axis.text.x = element_blank() ) Age: {r} model8 <- lm(as.numeric(support_visas) ~ age, data = df3) summary(model8) {r} model9 <- lm(as.numeric(support_visas) ~ I(age^2) , data = df3) summary(model9) {r} nd2 = expand.grid(age = df3$age, implicit = df3$implicit) nd2$y = predict(model8, newdata = nd2) nd2$se = predict(model8, newdata = nd2, se.fit=T)$se.fit nd2$upr = nd2$y + qnorm(0.975) * nd2$se nd2$lwr = nd2$y - qnorm(0.975) * nd2$se {r} ggplot(nd2, aes(x = interaction(age, implicit), y = y, color = age)) + geom_pointrange(data = nd2, aes(ymin = lwr, ymax = upr), alpha = 0.3)+ labs( title = "Prediction of visa support according to gender", subtitle = "", x = "Implicit bias", y = "Predicted level of visa support", ) + theme_classic()