Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
4.4 kB
1
Indexable
Never
{r}
immig <- read_csv("immig.csv") |> 
  transmute(
    age, 
    female = as.factor(female), 
    employed, 
    nontech.whitcol, 
    tech.whitcol, 
    explicit = expl.prejud, 
    implicit = impl.prejud, 
    support_visas = as.factor(h1bvis.supp), 
    support_imm = as.factor(indimm.supp))


#str(immig)  
#summary(immig)
#create_report(immig)


Question 1

Distribution

{r}
immig |> 
  ggplot(aes( x = support_visas))+
  geom_bar() ; immig |> # el ; sirve para que te ponga a la vez los resultados
  ggplot(aes( x = support_imm))+
  geom_bar() 


Proportion of people willing to increase:

{r}
proportions <- immig |> 
  mutate(
  support = 
    as.factor(if_else(
      support_visas == 0.75 | support_visas == 1 | support_imm == 0.75 | support_imm == 1, 1, 0)))

proportions |> 
  group_by(support) |>
  summarise(total = n()) |> 
  mutate(prop = total/sum(total))
  


Measures of cultural threat:

{r}
immig |> 
  ggplot(aes(y = explicit, x = implicit)) +
  geom_point()+
  geom_smooth(method = "lm")

{r}
cor(immig$explicit, immig$implicit) # does not work if there is missing data


Question 2

{r}
df2 <-data.frame(immig[1:1128, 6:9]) 

{r message=FALSE, warning=FALSE} 

# message = FALSE to avoid messages appearinf when running the code. 
# warning = FALSE to avoid the warning messages (arma de doble filo)
require(ggcorrplot)

model.matrix(~0+., data=df2) %>% 
  cor(use="pairwise.complete.obs") %>% 
  ggcorrplot(show.diag=FALSE, type="lower", lab=TRUE, lab_size=2)

Regression:

If the outcome variable is factor, linear regression should not work. But it could be, i just dont understand. I'm using as.numeric() in order to get back the numeric variable.

{r}
model1 <- lm(as.numeric(support_visas) ~ tech.whitcol, data = immig) 
model2 <- lm(as.numeric(support_imm) ~ tech.whitcol, data = immig)# if outcome is factor, linear regression does not work. 

{r}
summary(model1)
summary(model2)

Question 3

New variable. There are various ways; this is my "style" of programming. Like the fighting style of jedis. You'll dominate one someday.

{r}
df3 <- immig |> 
  mutate(
    sector = as.factor(
      case_when(
        tech.whitcol == 1 ~ "tech", 
        nontech.whitcol == 1 ~ "whitecollar", 
        employed == 1 ~ "employed", 
        employed == 0 ~ "unemployed"
      )
    )
  )

Again, if the outcome variable (support_visas) is factor, linear regression is not valid. I put as.numeric so R can run the regression

{r}
model3 <- lm(as.numeric(support_visas) ~ sector , data = df3) 
model4 <- lm(as.numeric(support_imm) ~ sector, data = df3)# if outcome is factor, linear regression does not work. 


{r}
summary(model3); summary(model4)

{r}
model5 <- lm(as.numeric(support_visas) ~ sector  + age + female + implicit + explicit, data = df3)
summary(model5)

Question 4

{r}
model6 <- lm(as.numeric(support_visas) ~  female*implicit , data = df3)

summary(model6)

{r}
nd = expand.grid(female = 0:1, implicit = df3$implicit) |> 
mutate(
  female = as.factor(female))

nd$y = predict(model6, newdata = nd)
nd$se = predict(model6, newdata = nd, se.fit=T)$se.fit
nd$upr = nd$y + qnorm(0.975) * nd$se
nd$lwr = nd$y - qnorm(0.975) * nd$se

{r}
ggplot(nd, aes(x = interaction(female, implicit), y = y, color = female)) +
  geom_pointrange(data = nd, aes(ymin = lwr, ymax = upr), alpha = 0.2)+
  labs(
    title = "Prediction of visa support according to gender",
    subtitle = "",
    x = "Implicit bias", 
    y = "Predicted level of visa support", ) +
  theme_classic() +
  #scale_x_discrete( solucionar lo del eje x para que aparezca alguna label 
   # breaks  = c(2, 2,5, 3, 3.5), 
    #labels = c("2", "2.5", "3", "3.5"))
  theme(
    axis.text.x = element_blank()
  )

Age:

{r}
model8 <- lm(as.numeric(support_visas) ~ age, data = df3)
summary(model8)

{r}
model9 <- lm(as.numeric(support_visas) ~  I(age^2) , data = df3)
summary(model9)

{r}
nd2 = expand.grid(age = df3$age, implicit = df3$implicit) 
 

nd2$y = predict(model8, newdata = nd2)
nd2$se = predict(model8, newdata = nd2, se.fit=T)$se.fit
nd2$upr = nd2$y + qnorm(0.975) * nd2$se
nd2$lwr = nd2$y - qnorm(0.975) * nd2$se

{r}
ggplot(nd2, aes(x = interaction(age, implicit), y = y, color = age)) +
  geom_pointrange(data = nd2, aes(ymin = lwr, ymax = upr), alpha = 0.3)+
  labs(
    title = "Prediction of visa support according to gender",
    subtitle = "",
    x = "Implicit bias", 
    y = "Predicted level of visa support", ) +
  theme_classic()