Untitled
unknown
r
19 days ago
1.9 kB
2
Indexable
install.packages("dplyr") install.packages("MASS") install.packages("ggeffects") # load these libraries library(MASS) library(dplyr) library(ggeffects) # loading the dataset here by equating a variable called "df" # to birthwt, this dataset has to do with birth weights whilst # controlling for different variables such as bwt or smoking status df = birthwt View(df) # manipulating the dataset here to edit the values of bwt # bwt signifies birth weight df$bwt = (2000 + df$bwt) - (df$age^2) # make a scatter plot with age as x axis, bwt as y axis # the $ operator means we are accessing all the rows of the specific # column # e.g df is our dataframe, df$age accesses the age column in df plot(df$age, log(df$bwt)) # we need to filter the outlier out df = subset(df, age < 40) plot(df$age, df$bwt) # making a regression line plot abline(lm(df$bwt ~ df$age), data = df, col = "red") # our null model i.e just using the mean to make that regression line null_model <- lm(bwt ~ 1, data = df) # our alternative model alt_model <- lm(bwt ~ age, data = df) # extending our alternative model by adding more predictors and a multiplicative term instead of an additive term # i.e the regression formula is now bwt = B0 + B1(Age) + B2(Smoke) + B3(Age x Smoke) extended_model <- lm(bwt ~ age * smoke + lwt, data = df) # testing normality of residuals with the Shapiro test shapiro.test(residuals(alt_model)) # visually inspecting the histogram of residuals to see if it roughly follows # a normal distribution hist(residuals(alt_model)) # this is another way to visualise residual distribution # dots should roughly follow the straight line from qqline() qqnorm(residuals(alt_model)) qqline(residuals(alt_model)) # comparing our models # a lower df on output means that model is better anova(alt_model, model_2) # generating predictions ggpredict(model_2, terms = c("age[15:45]", "smoke", "lwt"), data = df) %>% plot()
Editor is loading...
Leave a Comment