Untitled
unknown
r
a year ago
15 kB
1
Indexable
Never
rm(list=ls()) library(ggplot2) library(patchwork) weather <- read.csv("Data/weather.csv") (Y.model <- lm(rain ~pressure, data = weather)) p1aY = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) + geom_line(aes(y = Y.model$coefficients[1] + pressure*Y.model$coefficients[2], col = "est"), linetype = "dashed") + labs(title = "Precipitation (mm) against Air Pressure (hPa)") # Exponential decay press <- data.frame(weather[4]) Y.pred <- cbind(press, fit = predict(Y.model, press), conf = predict(Y.model, press, interval = "confidence"), pred = predict(Y.model, press, interval = "prediction")) head(Y.pred) # get rid of the extra fits Y.pred$conf.fit <- Y.pred$pred.fit <- NULL head(Y.pred) Y.pred$e <- Y.model$residuals head(Y.pred) (max.e <- max(abs(Y.pred$e))) (Y.elims <- c(-max.e, max.e)) ## Plot against yhat#### # Add a horizontal line at y=0, # and expand the y-axis to include +/- max residual. p1aYres = ggplot(data = Y.pred, aes(x = fit, y = e)) + geom_point(size = 1) + geom_hline(yintercept = 0) + expand_limits(y = Y.elims) + xlab(" ") + ylab(" ") + labs(tag = "identity") + labs(title = "Residuals vs Predicted Precipitation") + theme(text = element_text(size = 18)) ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) + geom_line(aes(y = Y.pred$fit, col = "fit"), linetype = "dashed") + geom_line(aes(y = Y.pred$conf.lwr, col = "conf"), linetype = "dashed") + geom_line(aes(y = Y.pred$conf.upr, col = "conf"), linetype = "dashed") + geom_line(aes(y = Y.pred$pred.lwr, col = "pred"), linetype = "dashed") + geom_line(aes(y = Y.pred$pred.upr, col = "pred"), linetype = "dashed") + labs(title = "Precipitation (mm) against Air Pressure (hPa)") (Ylog.model <- lm(log(rain) ~ pressure, data = weather)) p1alogY = ggplot(data = weather, aes(x = pressure, y = log(rain))) + geom_point(size = 1) + geom_line(aes(y = Ylog.model$coefficients[1] + pressure*Ylog.model$coefficients[2], col = "est"), linetype = "dashed") + labs(title = "Precipitation (log(mm)) against Air Pressure (hPa)") # Linear to the left, but after 1010 hPa the data seems to have another slope p1alogY ## resid anal ### Ylog.pred <- cbind(press, fit = predict(Ylog.model, press), conf = predict(Ylog.model, press, interval = "confidence"), pred = predict(Ylog.model, press, interval = "prediction")) head(Y.pred) # get rid of the extra fits Ylog.pred$conf.fit <- Ylog.pred$pred.fit <- NULL head(Ylog.pred) Ylog.pred$e <- Ylog.model$residuals head(Ylog.pred) (max.e <- max(abs(Ylog.pred$e))) (Ylog.elims <- c(-max.e, max.e)) ## Plot against yhat#### # Add a horizontal line at y=0, # and expand the y-axis to include +/- max residual. p1alogYres = ggplot(data = Ylog.pred, aes(x = fit, y = e)) + geom_point(size = 1) + geom_hline(yintercept = 0) + expand_limits(y = Ylog.elims) + xlab(" ") + ylab("Residual") + labs(tag = "log") + #labs(title = "Residuals vs Predicted log-Precipitation") + theme(text = element_text(size = 18)) p1alogYints = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) + geom_line(aes(y = exp(Ylog.pred$fit), col = "fit"), linetype = "dashed") + geom_line(aes(y = exp(Ylog.pred$conf.lwr), col = "conf"), linetype = "dashed") + geom_line(aes(y = exp(Ylog.pred$conf.upr), col = "conf"), linetype = "dashed") + geom_line(aes(y = exp(Ylog.pred$pred.lwr), col = "pred"), linetype = "dashed") + geom_line(aes(y = exp(Ylog.pred$pred.upr), col = "pred"), linetype = "dashed") + labs(title = "Precipitation log(mm) against Air Pressure (hPa)") p1alogYints (Ycr.model <- lm(rain^(1/3) ~ pressure, data = weather)) p1aYcr = ggplot(data = weather, aes(x = pressure, y = rain^(1/3))) + geom_point(size = 1) + geom_line(aes(y = Ycr.model$coefficients[1] +pressure*Ycr.model$coefficients[2], col = "est"), linetype = "dashed") + labs(title = "Precipitation ((mm)^1/3) Against Air Pressure (hPa)") # Linear, fits our model Y = xB + e p1aYcr ## resid anal ### Ycr.pred <- cbind(press, fit = predict(Ycr.model, press), conf = predict(Ycr.model, press, interval = "confidence"), pred = predict(Ycr.model, press, interval = "prediction")) head(Y.pred) # get rid of the extra fits Ycr.pred$conf.fit <- Ycr.pred$pred.fit <- NULL head(Ycr.pred) Ycr.pred$e <- Ycr.model$residuals head(Ycr.pred) (max.e <- max(abs(Ycr.pred$e))) (Ycr.elims <- c(-max.e, max.e)) ## Plot against yhat#### # Add a horizontal line at y=0, # and expand the y-axis to include +/- max residual. p1aYcrres = ggplot(data = Ycr.pred, aes(x = fit, y = e)) + geom_point(size = 1) + geom_hline(yintercept = 0) + expand_limits(y = Ycr.elims) + xlab("Predicted Precipitation") + ylab(" ") + labs(tag = "cube root") + #labs(title = "Residuals vs Predicted Precipitation") + theme(text = element_text(size = 18)) p1aYcrints = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) + geom_line(aes(y = (Ycr.pred$fit)^3, col = "fit"), linetype = "dashed") + geom_line(aes(y = (Ycr.pred$conf.lwr)^3, col = "conf"), linetype = "dashed") + geom_line(aes(y = (Ycr.pred$conf.upr)^3, col = "conf"), linetype = "dashed") + geom_line(aes(y = (Ycr.pred$pred.lwr)^3, col = "pred"), linetype = "dashed") + geom_line(aes(y = (Ycr.pred$pred.upr)^3, col = "pred"), linetype = "dashed") + labs(title = "95%-Confidence and Prediction Intervals", x = "Air Pressure (hPa)", y = "Precipitation (mm)^(1/3)") # FINAL RESIDUAL PLOTS p1aYres/p1alogYres/p1aYcrres # FINAL ESTIMATED RELATIONSHIP PLOTS p1aY/p1alogY/p1aYcr # FINAL INVERSE TRANSFORMED CONFIDENCE AND PREDICTION INTERVAL p1alogYints/p1aYcrints ################## Part 2: ##################### (sum( weather[1] == "Lund")) (sum( weather[1] == "Katterjåkk")) (sum( weather[1] == "Uppsala")) summary(Ycr.model) # p-value < 2.2e-16 so we reject H_0. (weather$location <- as.factor(weather$location)) (dats <- data.frame(residuals = Ycr.pred$e, locations = weather$location )) bp <- ggplot(data = dats, aes(x = locations, y = residuals, fill = locations)) + geom_boxplot() bp + labs(title = "Boxplot; Residuals for every location", x = "Region", y="Resiudals") + theme_classic() # 2b continued; Katterjåkk valt, minst outlliers. # relevel med ref = "Katterjåkk".-> anova(Ycr.model, NEW MODEL: (med ref=Katterjåkk)) # Boxplotta detta med ny modell Ycr.new_model<-lm( rain^(1/3) ~ pressure + relevel(location, ref = "Katterjåkk"), data = weather) (summary(Ycr.new_model)) Ycr.pred2b <- predict(Ycr.new_model, interval = "prediction") Ycr.conf2 <- predict(Ycr.new_model, interval = "confidence") ggplot(data = weather, aes(x = pressure, y = rain^(1/3), col = factor(location))) + geom_point(size = 0) + geom_line(aes(y = Ycr.pred2b[1:1496,1]), linetype = "dashed") + geom_line(aes(y = Ycr.pred2b[1:1496,2]), linetype = "dashed") + geom_line(aes(y = Ycr.pred2b[1:1496,3]), linetype = "dashed") + geom_line(aes(y = Ycr.conf2[1:1496,2]), linetype = "dashed") + geom_line(aes(y = Ycr.conf2[1:1496,3]), linetype = "dashed") + labs(title = "Precipitation (mm)^(1/3) against Air Pressure (hPa)") ggplot(data = weather, aes(x = pressure, y = rain^(1/3), col = factor(location))) + geom_point(size = 0) + geom_line(aes(y = Ycr.pred2b[1:1496,1]), linetype = "dashed") + geom_line(aes(y = Ycr.pred2b[1:1496,2]), linetype = "dashed") + geom_line(aes(y = Ycr.pred2b[1:1496,3]), linetype = "dashed") + geom_line(aes(y = Ycr.conf2[1:1496,2]), linetype = "dashed") + geom_line(aes(y = Ycr.conf2[1:1496,3]), linetype = "dashed") + facet_grid(location~. ) + labs(title = "Precipitation (mm)^(1/3) against Air Pressure (hPa)") #anova(lm(log(Pb)~I(year-1975), data = Pb_all), Pb.model ) anova(Ycr.model, Ycr.new_model) Ycr.pred$e2 <- Ycr.new_model$residuals head(Ycr.pred) (max.e2 <- max(abs(Ycr.pred$e2))) (Ycr.elims2 <- c(-max.e2, max.e2)) (dats_new <- data.frame(residuals = Ycr.pred$e2, locations = weather$location )) bp_new <- ggplot(data = dats_new, aes(x = locations, y = residuals, fill = locations)) + geom_boxplot() bp_new + labs(title = "Boxplot; Residuals for every location New Model", x = "Region", y="Resiudals") + theme_classic() bp + labs(title = "Former model") + bp_new + labs(title = "New model") # DIFFERENCE; residuals are now centered at zero and have fewer outliers # ########### 2c ################# p2c1 <- (ggplot(data = weather, aes(x = speed, y = rain^(1/3) )) + geom_point(size = 1)) p2c2 <- (ggplot(data = weather, aes(x = pressure, y = speed)) + geom_point(size=1)) (p2c1 + labs(title = "Transf. Precip. (mm)^(1/3) over Avg. Monthly Wind Speed (m/s)"))/(p2c2 + labs(title = "Ibid. over Air pressure (hPa)")) bpnews <- ggplot(data = weather, aes(x = location, y = speed, fill = location)) + geom_boxplot()#+ bpnews + theme_classic() + labs(title = "Boxplots; Avg. Monthly Wind Speed over Location") Ycr.new_new_model <- lm( rain^(1/3) ~ pressure + relevel(location, ref = "Katterjåkk") + speed, data = weather) summary(Ycr.new_new_model) confint(Ycr.new_new_model) anova(Ycr.new_model, Ycr.new_new_model) ########## 2d ########### ggplot(data = weather, aes(x = speed, y = Ycr.new_new_model$residuals, col = factor(location))) + geom_point(size = 1) + facet_grid(location~., scales = "free_x") + geom_smooth() + labs(title = "Residuals for Model 2c) over speed", y = "Residuals") Ycr.new_new_new_model<- lm( rain^(1/3)~pressure+speed*relevel(location, ref = "Katterjåkk"), data = weather) summary(Ycr.new_new_new_model) confint(Ycr.new_new_new_model) ggplot(data = weather, aes(x = speed, y = Ycr.new_new_new_model$residuals, col = factor(location))) + geom_point(size = 1) + facet_grid(location~., scales = "free_x") + geom_smooth() + labs(title = "Residuals for Model 2d) over speed", y = "Residuals") ############## 2e ############## newtimetwo <- data.frame(pressure = c(1011), speed = c(2), location = factor(rep(c("Katterjåkk","Lund","Uppsala")))) newtimefive <- data.frame(pressure = c(1011), speed = c(5), location = factor(rep(c("Katterjåkk","Lund","Uppsala")))) #2b #### YlocKatterjåkkTwo.pred <- cbind(newtimetwo, fit = predict(Ycr.new_model, newtimetwo)^3, conf = predict(Ycr.new_model, newtimetwo, interval = "confidence")^3, pred = predict(Ycr.new_model, newtimetwo, interval = "prediction")^3) # get rid of the extra fits YlocKatterjåkkTwo.pred$conf.fit <- YlocKatterjåkkTwo.pred$pred.fit <- NULL YlocKatterjåkkFive.pred <- cbind(newtimefive, fit = predict(Ycr.new_model, newtimefive)^3, conf = predict(Ycr.new_model, newtimefive, interval = "confidence")^3, pred = predict(Ycr.new_model, newtimefive, interval = "prediction")^3) # get rid of the extra fits YlocKatterjåkkFive.pred$conf.fit <- YlocKatterjåkkFive.pred$pred.fit <- NULL #2c #### YcrNewNewTwo.pred <- cbind(newtimetwo, fit = predict(Ycr.new_new_model, newtimetwo)^3, conf = predict(Ycr.new_new_model, newtimetwo, interval = "confidence")^3, pred = predict(Ycr.new_new_model, newtimetwo, interval = "prediction")^3) # get rid of the extra fits YcrNewNewTwo.pred$conf.fit <- YcrNewNewTwo.pred$pred.fit <- NULL YcrNewNewFive.pred <- cbind(newtimefive, fit = predict(Ycr.new_new_model, newtimefive)^3, conf = predict(Ycr.new_new_model, newtimefive, interval = "confidence")^3, pred = predict(Ycr.new_new_model, newtimefive, interval = "prediction")^3) # get rid of the extra fits YcrNewNewFive.pred$conf.fit <- YcrNewNewFive.pred$pred.fit <- NULL #2d #### YcrNewNewNewTwo.pred <- cbind(newtimetwo, fit = predict(Ycr.new_new_new_model, newtimetwo)^3, conf = predict(Ycr.new_new_new_model, newtimetwo, interval = "confidence")^3, pred = predict(Ycr.new_new_new_model, newtimetwo, interval = "prediction")^3) # get rid of the extra fits YcrNewNewNewTwo.pred$conf.fit <- YcrNewNewNewTwo.pred$pred.fit <- NULL YcrNewNewNewFive.pred <- cbind(newtimefive, fit = predict(Ycr.new_new_new_model, newtimefive)^3, conf = predict(Ycr.new_new_new_model, newtimefive, interval = "confidence")^3, pred = predict(Ycr.new_new_new_model, newtimefive, interval = "prediction")^3) # get rid of the extra fits YcrNewNewNewFive.pred$conf.fit <- YcrNewNewNewFive.pred$pred.fit <- NULL YlocKatterjåkkTwo.pred YcrNewNewTwo.pred YcrNewNewNewTwo.pred YlocKatterjåkkFive.pred YcrNewNewFive.pred YcrNewNewNewFive.pred ######### 3 ######## lst <- sort(influence(Ycr.new_new_new_model)$hat, index.return=TRUE, decreasing=TRUE) leverages <- lapply(lst, `[`, lst$x %in% head(unique(lst$x),7)) leverages$x weather$location[leverages$ix] #cooks.distance(Ycr.new_new_new_model) max(cooks.distance(Ycr.new_new_new_model)) which.max(cooks.distance(Ycr.new_new_new_model)) ggplot(data = weather, aes(x = pressure, y = speed, col = factor(location))) + geom_point(size = 1) + geom_point(data = weather[leverages$ix,], aes(x = pressure, y = speed, col = ), size = 1, colour = 'chocolate4') + geom_point(data = weather[1107,], aes(x = pressure, y = speed), size = 1, colour = 'black') + facet_wrap(~location, scales = "free_x") + #facet_grid(location~., scales = "free_x") + labs(title = "speed against Air Pressure (hPa)") dfbetas(Ycr.new_new_new_model)[1107,] ggplot(data = weather, aes(x = speed, y = rain^(1/3))) + geom_point(size = 1) + geom_point(data = weather[1107,], size = 1, colour = 'red') + #facet_wrap(~location) + labs(title = "Precipitation (mm) against Air speed") ####### studentizedResiduals <- rstudent(Ycr.new_new_new_model) fittedValues <- Ycr.new_new_new_model$fitted.values studresvval <- data.frame(residuals = studentizedResiduals, fit = fittedValues) ggplot(data = studresvval, aes(x = fit, y = residuals)) + geom_point(size = 1) + labs(title = "Studentized residuals against fitted values") ggplot(data = studresvval, aes(x = fit, y = sqrt(abs(residuals)))) + geom_point(size = 1) + labs(title = "Root of absolute value of studentized residuals against fitted values") ####### weather$monthnr <- as.factor(substr(weather$month, 6, 7)) null.model <- lm(rain^(1/3) ~ 1, data = weather) full.model <- lm(rain^(1/3)~pressure*relevel(location, "Katterjåkk")*speed*temp*monthnr, data = weather) BIC(null.model) BIC(full.model) step(Ycr.new_new_new_model, data = weather, scope = list(lower = null.model, upper = full.model), direction = "both", k =log(nrow(weather))) #step(lm(rain^(1/3)~pressure*location, data = weather), scope = list(lower = null.model, upper = lm(rain^(1/3)~pressure*location*speed*temp*monthnr, data = weather)), direction = "both")