# Do the package and library thing for model summary in 2d and 2f
install.packages("modelsummary")
library(modelsummary)
# import the dataframe as variable 'z'
z<-read.csv("entrepreneurs.csv")
### 2b
# How many variables? Just count the groups.
# How many observations in the 'control', 'treated' and 'true winners' groups?
summary(z)
# since there are no NA values in the Groups, I can add 1112 + 729 + 475
# to get the total number of observations, which = 2316.
# I'd usually just use dim() to get the number of observations and variables,
# but I'm playing Code Golf against another student, i.e. I'm aiming for
# as few non-comment characters as possible
### 2c
aggregate(.~z[,5],z[(3:10)],mean)
### 2d
# write a big function to save characters:
# the 's' function takes a single argument: the name of a
# factor level of the variable "Group" in the main data
# then the function subsets the data by removing all the
# observations that have the given variable.
# then the function adds a column 'b' to the data
# and if an observation belongs to the control group, b = 0
# but if it doesn't, b = 1
s<-function(i){j<-z[z[,5]!=i,]
j$b<-ifelse(j[,5]=="control",0,1)
return(j)}
# create a subset 'q' of the data, where the "true winners" group is
# dropped, then add the 1 if treated and 0 if not
q<-s("true winners")
# write another function to save characters:
# the function takes a subsetted dataframe as it's argument 'y'
# first, the function creates a sub-function 'f'
# the sub-function takes a variable as its first argument 'x',
# and the given dataframe as its second argument,
# then creates a linear model that regresses the passed variable
# on the b (binary) variable in the passed dataframe.
# back to the main function, the function creates a modelsummary of
# the two linear models on column 10 of the given dataframe (which will
# always be 'OperatesFirm') and column 11 of the given dataframe (which will
# always be 'WorkHours')
g<-function(y){
f<-function(x,y)lm(x~b,y)
modelsummary(list(f(y[,10],y),f(y[,11],y)),
stars=TRUE,gof_map=c("nobs","adj.r.squared"))}
# look I'm sorry about this but I've got a Code Golf game to win
# return the model summary for subset 'q'
g(q)
### 2f
# subset the data as before, but this time into subset 'e', and
# drop the 'treated' factor level of the 'Groups' variable and
# add a column b in which there is an 0 if the observation is
# in the control group and a 1 if not
e<-s("treated")
# then return the model summary for subset 'e'
g(e)