Untitled
unknown
plain_text
2 years ago
4.4 kB
10
Indexable
{r}
immig <- read_csv("immig.csv") |>
transmute(
age,
female = as.factor(female),
employed,
nontech.whitcol,
tech.whitcol,
explicit = expl.prejud,
implicit = impl.prejud,
support_visas = as.factor(h1bvis.supp),
support_imm = as.factor(indimm.supp))
#str(immig)
#summary(immig)
#create_report(immig)
Question 1
Distribution
{r}
immig |>
ggplot(aes( x = support_visas))+
geom_bar() ; immig |> # el ; sirve para que te ponga a la vez los resultados
ggplot(aes( x = support_imm))+
geom_bar()
Proportion of people willing to increase:
{r}
proportions <- immig |>
mutate(
support =
as.factor(if_else(
support_visas == 0.75 | support_visas == 1 | support_imm == 0.75 | support_imm == 1, 1, 0)))
proportions |>
group_by(support) |>
summarise(total = n()) |>
mutate(prop = total/sum(total))
Measures of cultural threat:
{r}
immig |>
ggplot(aes(y = explicit, x = implicit)) +
geom_point()+
geom_smooth(method = "lm")
{r}
cor(immig$explicit, immig$implicit) # does not work if there is missing data
Question 2
{r}
df2 <-data.frame(immig[1:1128, 6:9])
{r message=FALSE, warning=FALSE}
# message = FALSE to avoid messages appearinf when running the code.
# warning = FALSE to avoid the warning messages (arma de doble filo)
require(ggcorrplot)
model.matrix(~0+., data=df2) %>%
cor(use="pairwise.complete.obs") %>%
ggcorrplot(show.diag=FALSE, type="lower", lab=TRUE, lab_size=2)
Regression:
If the outcome variable is factor, linear regression should not work. But it could be, i just dont understand. I'm using as.numeric() in order to get back the numeric variable.
{r}
model1 <- lm(as.numeric(support_visas) ~ tech.whitcol, data = immig)
model2 <- lm(as.numeric(support_imm) ~ tech.whitcol, data = immig)# if outcome is factor, linear regression does not work.
{r}
summary(model1)
summary(model2)
Question 3
New variable. There are various ways; this is my "style" of programming. Like the fighting style of jedis. You'll dominate one someday.
{r}
df3 <- immig |>
mutate(
sector = as.factor(
case_when(
tech.whitcol == 1 ~ "tech",
nontech.whitcol == 1 ~ "whitecollar",
employed == 1 ~ "employed",
employed == 0 ~ "unemployed"
)
)
)
Again, if the outcome variable (support_visas) is factor, linear regression is not valid. I put as.numeric so R can run the regression
{r}
model3 <- lm(as.numeric(support_visas) ~ sector , data = df3)
model4 <- lm(as.numeric(support_imm) ~ sector, data = df3)# if outcome is factor, linear regression does not work.
{r}
summary(model3); summary(model4)
{r}
model5 <- lm(as.numeric(support_visas) ~ sector + age + female + implicit + explicit, data = df3)
summary(model5)
Question 4
{r}
model6 <- lm(as.numeric(support_visas) ~ female*implicit , data = df3)
summary(model6)
{r}
nd = expand.grid(female = 0:1, implicit = df3$implicit) |>
mutate(
female = as.factor(female))
nd$y = predict(model6, newdata = nd)
nd$se = predict(model6, newdata = nd, se.fit=T)$se.fit
nd$upr = nd$y + qnorm(0.975) * nd$se
nd$lwr = nd$y - qnorm(0.975) * nd$se
{r}
ggplot(nd, aes(x = interaction(female, implicit), y = y, color = female)) +
geom_pointrange(data = nd, aes(ymin = lwr, ymax = upr), alpha = 0.2)+
labs(
title = "Prediction of visa support according to gender",
subtitle = "",
x = "Implicit bias",
y = "Predicted level of visa support", ) +
theme_classic() +
#scale_x_discrete( solucionar lo del eje x para que aparezca alguna label
# breaks = c(2, 2,5, 3, 3.5),
#labels = c("2", "2.5", "3", "3.5"))
theme(
axis.text.x = element_blank()
)
Age:
{r}
model8 <- lm(as.numeric(support_visas) ~ age, data = df3)
summary(model8)
{r}
model9 <- lm(as.numeric(support_visas) ~ I(age^2) , data = df3)
summary(model9)
{r}
nd2 = expand.grid(age = df3$age, implicit = df3$implicit)
nd2$y = predict(model8, newdata = nd2)
nd2$se = predict(model8, newdata = nd2, se.fit=T)$se.fit
nd2$upr = nd2$y + qnorm(0.975) * nd2$se
nd2$lwr = nd2$y - qnorm(0.975) * nd2$se
{r}
ggplot(nd2, aes(x = interaction(age, implicit), y = y, color = age)) +
geom_pointrange(data = nd2, aes(ymin = lwr, ymax = upr), alpha = 0.3)+
labs(
title = "Prediction of visa support according to gender",
subtitle = "",
x = "Implicit bias",
y = "Predicted level of visa support", ) +
theme_classic()
Editor is loading...