rm(list=ls())
library(ggplot2)
library(patchwork)
weather <- read.csv("Data/weather.csv")
(Y.model <- lm(rain ~pressure, data = weather))
p1aY = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) +
geom_line(aes(y = Y.model$coefficients[1] + pressure*Y.model$coefficients[2], col = "est"), linetype = "dashed") +
labs(title = "Precipitation (mm) against Air Pressure (hPa)")
# Exponential decay
press <- data.frame(weather[4])
Y.pred <-
cbind(press,
fit = predict(Y.model, press),
conf = predict(Y.model, press, interval = "confidence"),
pred = predict(Y.model, press, interval = "prediction"))
head(Y.pred)
# get rid of the extra fits
Y.pred$conf.fit <- Y.pred$pred.fit <- NULL
head(Y.pred)
Y.pred$e <- Y.model$residuals
head(Y.pred)
(max.e <- max(abs(Y.pred$e)))
(Y.elims <- c(-max.e, max.e))
## Plot against yhat####
# Add a horizontal line at y=0,
# and expand the y-axis to include +/- max residual.
p1aYres = ggplot(data = Y.pred, aes(x = fit, y = e)) +
geom_point(size = 1) +
geom_hline(yintercept = 0) +
expand_limits(y = Y.elims) +
xlab(" ") +
ylab(" ") +
labs(tag = "identity") +
labs(title = "Residuals vs Predicted Precipitation") +
theme(text = element_text(size = 18))
ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) +
geom_line(aes(y = Y.pred$fit, col = "fit"), linetype = "dashed") +
geom_line(aes(y = Y.pred$conf.lwr, col = "conf"), linetype = "dashed") +
geom_line(aes(y = Y.pred$conf.upr, col = "conf"), linetype = "dashed") +
geom_line(aes(y = Y.pred$pred.lwr, col = "pred"), linetype = "dashed") +
geom_line(aes(y = Y.pred$pred.upr, col = "pred"), linetype = "dashed") +
labs(title = "Precipitation (mm) against Air Pressure (hPa)")
(Ylog.model <- lm(log(rain) ~ pressure, data = weather))
p1alogY = ggplot(data = weather, aes(x = pressure, y = log(rain))) + geom_point(size = 1) +
geom_line(aes(y = Ylog.model$coefficients[1] + pressure*Ylog.model$coefficients[2], col = "est"), linetype = "dashed") +
labs(title = "Precipitation (log(mm)) against Air Pressure (hPa)")
# Linear to the left, but after 1010 hPa the data seems to have another slope
p1alogY
## resid anal ###
Ylog.pred <-
cbind(press,
fit = predict(Ylog.model, press),
conf = predict(Ylog.model, press, interval = "confidence"),
pred = predict(Ylog.model, press, interval = "prediction"))
head(Y.pred)
# get rid of the extra fits
Ylog.pred$conf.fit <- Ylog.pred$pred.fit <- NULL
head(Ylog.pred)
Ylog.pred$e <- Ylog.model$residuals
head(Ylog.pred)
(max.e <- max(abs(Ylog.pred$e)))
(Ylog.elims <- c(-max.e, max.e))
## Plot against yhat####
# Add a horizontal line at y=0,
# and expand the y-axis to include +/- max residual.
p1alogYres = ggplot(data = Ylog.pred, aes(x = fit, y = e)) +
geom_point(size = 1) +
geom_hline(yintercept = 0) +
expand_limits(y = Ylog.elims) +
xlab(" ") +
ylab("Residual") +
labs(tag = "log") +
#labs(title = "Residuals vs Predicted log-Precipitation") +
theme(text = element_text(size = 18))
p1alogYints = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) +
geom_line(aes(y = exp(Ylog.pred$fit), col = "fit"), linetype = "dashed") +
geom_line(aes(y = exp(Ylog.pred$conf.lwr), col = "conf"), linetype = "dashed") +
geom_line(aes(y = exp(Ylog.pred$conf.upr), col = "conf"), linetype = "dashed") +
geom_line(aes(y = exp(Ylog.pred$pred.lwr), col = "pred"), linetype = "dashed") +
geom_line(aes(y = exp(Ylog.pred$pred.upr), col = "pred"), linetype = "dashed") +
labs(title = "Precipitation log(mm) against Air Pressure (hPa)")
p1alogYints
(Ycr.model <- lm(rain^(1/3) ~ pressure, data = weather))
p1aYcr = ggplot(data = weather, aes(x = pressure, y = rain^(1/3))) + geom_point(size = 1) +
geom_line(aes(y = Ycr.model$coefficients[1] +pressure*Ycr.model$coefficients[2], col = "est"), linetype = "dashed") +
labs(title = "Precipitation ((mm)^1/3) Against Air Pressure (hPa)")
# Linear, fits our model Y = xB + e
p1aYcr
## resid anal ###
Ycr.pred <-
cbind(press,
fit = predict(Ycr.model, press),
conf = predict(Ycr.model, press, interval = "confidence"),
pred = predict(Ycr.model, press, interval = "prediction"))
head(Y.pred)
# get rid of the extra fits
Ycr.pred$conf.fit <- Ycr.pred$pred.fit <- NULL
head(Ycr.pred)
Ycr.pred$e <- Ycr.model$residuals
head(Ycr.pred)
(max.e <- max(abs(Ycr.pred$e)))
(Ycr.elims <- c(-max.e, max.e))
## Plot against yhat####
# Add a horizontal line at y=0,
# and expand the y-axis to include +/- max residual.
p1aYcrres = ggplot(data = Ycr.pred, aes(x = fit, y = e)) +
geom_point(size = 1) +
geom_hline(yintercept = 0) +
expand_limits(y = Ycr.elims) +
xlab("Predicted Precipitation") +
ylab(" ") +
labs(tag = "cube root") +
#labs(title = "Residuals vs Predicted Precipitation") +
theme(text = element_text(size = 18))
p1aYcrints = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) +
geom_line(aes(y = (Ycr.pred$fit)^3, col = "fit"), linetype = "dashed") +
geom_line(aes(y = (Ycr.pred$conf.lwr)^3, col = "conf"), linetype = "dashed") +
geom_line(aes(y = (Ycr.pred$conf.upr)^3, col = "conf"), linetype = "dashed") +
geom_line(aes(y = (Ycr.pred$pred.lwr)^3, col = "pred"), linetype = "dashed") +
geom_line(aes(y = (Ycr.pred$pred.upr)^3, col = "pred"), linetype = "dashed") +
labs(title = "95%-Confidence and Prediction Intervals", x = "Air Pressure (hPa)", y = "Precipitation (mm)^(1/3)")
# FINAL RESIDUAL PLOTS
p1aYres/p1alogYres/p1aYcrres
# FINAL ESTIMATED RELATIONSHIP PLOTS
p1aY/p1alogY/p1aYcr
# FINAL INVERSE TRANSFORMED CONFIDENCE AND PREDICTION INTERVAL
p1alogYints/p1aYcrints
################## Part 2: #####################
(sum( weather[1] == "Lund"))
(sum( weather[1] == "Katterjåkk"))
(sum( weather[1] == "Uppsala"))
summary(Ycr.model) # p-value < 2.2e-16 so we reject H_0.
(weather$location <- as.factor(weather$location))
(dats <- data.frame(residuals = Ycr.pred$e, locations = weather$location ))
bp <- ggplot(data = dats, aes(x = locations, y = residuals, fill = locations)) +
geom_boxplot()
bp + labs(title = "Boxplot; Residuals for every location", x = "Region", y="Resiudals") +
theme_classic()
# 2b continued; Katterjåkk valt, minst outlliers.
# relevel med ref = "Katterjåkk".-> anova(Ycr.model, NEW MODEL: (med ref=Katterjåkk))
# Boxplotta detta med ny modell
Ycr.new_model<-lm( rain^(1/3) ~ pressure + relevel(location, ref = "Katterjåkk"), data = weather)
(summary(Ycr.new_model))
Ycr.pred2b <- predict(Ycr.new_model, interval = "prediction")
Ycr.conf2 <- predict(Ycr.new_model, interval = "confidence")
ggplot(data = weather, aes(x = pressure, y = rain^(1/3), col = factor(location))) + geom_point(size = 0) +
geom_line(aes(y = Ycr.pred2b[1:1496,1]), linetype = "dashed") +
geom_line(aes(y = Ycr.pred2b[1:1496,2]), linetype = "dashed") +
geom_line(aes(y = Ycr.pred2b[1:1496,3]), linetype = "dashed") +
geom_line(aes(y = Ycr.conf2[1:1496,2]), linetype = "dashed") +
geom_line(aes(y = Ycr.conf2[1:1496,3]), linetype = "dashed") +
labs(title = "Precipitation (mm)^(1/3) against Air Pressure (hPa)")
ggplot(data = weather, aes(x = pressure, y = rain^(1/3), col = factor(location))) + geom_point(size = 0) +
geom_line(aes(y = Ycr.pred2b[1:1496,1]), linetype = "dashed") +
geom_line(aes(y = Ycr.pred2b[1:1496,2]), linetype = "dashed") +
geom_line(aes(y = Ycr.pred2b[1:1496,3]), linetype = "dashed") +
geom_line(aes(y = Ycr.conf2[1:1496,2]), linetype = "dashed") +
geom_line(aes(y = Ycr.conf2[1:1496,3]), linetype = "dashed") +
facet_grid(location~. ) +
labs(title = "Precipitation (mm)^(1/3) against Air Pressure (hPa)")
#anova(lm(log(Pb)~I(year-1975), data = Pb_all), Pb.model )
anova(Ycr.model, Ycr.new_model)
Ycr.pred$e2 <- Ycr.new_model$residuals
head(Ycr.pred)
(max.e2 <- max(abs(Ycr.pred$e2)))
(Ycr.elims2 <- c(-max.e2, max.e2))
(dats_new <- data.frame(residuals = Ycr.pred$e2, locations = weather$location ))
bp_new <- ggplot(data = dats_new, aes(x = locations, y = residuals, fill = locations)) +
geom_boxplot()
bp_new + labs(title = "Boxplot; Residuals for every location New Model", x = "Region", y="Resiudals") +
theme_classic()
bp + labs(title = "Former model") + bp_new + labs(title = "New model")
# DIFFERENCE; residuals are now centered at zero and have fewer outliers
# ########### 2c #################
p2c1 <- (ggplot(data = weather, aes(x = speed, y = rain^(1/3) )) + geom_point(size = 1))
p2c2 <- (ggplot(data = weather, aes(x = pressure, y = speed)) + geom_point(size=1))
(p2c1 + labs(title = "Transf. Precip. (mm)^(1/3) over Avg. Monthly Wind Speed (m/s)"))/(p2c2 + labs(title = "Ibid. over Air pressure (hPa)"))
bpnews <- ggplot(data = weather, aes(x = location, y = speed, fill = location)) +
geom_boxplot()#+
bpnews + theme_classic() + labs(title = "Boxplots; Avg. Monthly Wind Speed over Location")
Ycr.new_new_model <- lm( rain^(1/3) ~ pressure + relevel(location, ref = "Katterjåkk") + speed, data = weather)
summary(Ycr.new_new_model)
confint(Ycr.new_new_model)
anova(Ycr.new_model, Ycr.new_new_model)
########## 2d ###########
ggplot(data = weather, aes(x = speed, y = Ycr.new_new_model$residuals, col = factor(location))) +
geom_point(size = 1) + facet_grid(location~., scales = "free_x") +
geom_smooth() + labs(title = "Residuals for Model 2c) over speed", y = "Residuals")
Ycr.new_new_new_model<- lm( rain^(1/3)~pressure+speed*relevel(location, ref = "Katterjåkk"), data = weather)
summary(Ycr.new_new_new_model)
confint(Ycr.new_new_new_model)
ggplot(data = weather, aes(x = speed, y = Ycr.new_new_new_model$residuals, col = factor(location))) +
geom_point(size = 1) + facet_grid(location~., scales = "free_x") +
geom_smooth() + labs(title = "Residuals for Model 2d) over speed", y = "Residuals")
############## 2e ##############
newtimetwo <- data.frame(pressure = c(1011), speed = c(2), location = factor(rep(c("Katterjåkk","Lund","Uppsala"))))
newtimefive <- data.frame(pressure = c(1011), speed = c(5), location = factor(rep(c("Katterjåkk","Lund","Uppsala"))))
#2b ####
YlocKatterjåkkTwo.pred <-
cbind(newtimetwo,
fit = predict(Ycr.new_model, newtimetwo)^3,
conf = predict(Ycr.new_model, newtimetwo, interval = "confidence")^3,
pred = predict(Ycr.new_model, newtimetwo, interval = "prediction")^3)
# get rid of the extra fits
YlocKatterjåkkTwo.pred$conf.fit <- YlocKatterjåkkTwo.pred$pred.fit <- NULL
YlocKatterjåkkFive.pred <-
cbind(newtimefive,
fit = predict(Ycr.new_model, newtimefive)^3,
conf = predict(Ycr.new_model, newtimefive, interval = "confidence")^3,
pred = predict(Ycr.new_model, newtimefive, interval = "prediction")^3)
# get rid of the extra fits
YlocKatterjåkkFive.pred$conf.fit <- YlocKatterjåkkFive.pred$pred.fit <- NULL
#2c ####
YcrNewNewTwo.pred <-
cbind(newtimetwo,
fit = predict(Ycr.new_new_model, newtimetwo)^3,
conf = predict(Ycr.new_new_model, newtimetwo, interval = "confidence")^3,
pred = predict(Ycr.new_new_model, newtimetwo, interval = "prediction")^3)
# get rid of the extra fits
YcrNewNewTwo.pred$conf.fit <- YcrNewNewTwo.pred$pred.fit <- NULL
YcrNewNewFive.pred <-
cbind(newtimefive,
fit = predict(Ycr.new_new_model, newtimefive)^3,
conf = predict(Ycr.new_new_model, newtimefive, interval = "confidence")^3,
pred = predict(Ycr.new_new_model, newtimefive, interval = "prediction")^3)
# get rid of the extra fits
YcrNewNewFive.pred$conf.fit <- YcrNewNewFive.pred$pred.fit <- NULL
#2d ####
YcrNewNewNewTwo.pred <-
cbind(newtimetwo,
fit = predict(Ycr.new_new_new_model, newtimetwo)^3,
conf = predict(Ycr.new_new_new_model, newtimetwo, interval = "confidence")^3,
pred = predict(Ycr.new_new_new_model, newtimetwo, interval = "prediction")^3)
# get rid of the extra fits
YcrNewNewNewTwo.pred$conf.fit <- YcrNewNewNewTwo.pred$pred.fit <- NULL
YcrNewNewNewFive.pred <-
cbind(newtimefive,
fit = predict(Ycr.new_new_new_model, newtimefive)^3,
conf = predict(Ycr.new_new_new_model, newtimefive, interval = "confidence")^3,
pred = predict(Ycr.new_new_new_model, newtimefive, interval = "prediction")^3)
# get rid of the extra fits
YcrNewNewNewFive.pred$conf.fit <- YcrNewNewNewFive.pred$pred.fit <- NULL
YlocKatterjåkkTwo.pred
YcrNewNewTwo.pred
YcrNewNewNewTwo.pred
YlocKatterjåkkFive.pred
YcrNewNewFive.pred
YcrNewNewNewFive.pred
######### 3 ########
lst <- sort(influence(Ycr.new_new_new_model)$hat, index.return=TRUE, decreasing=TRUE)
leverages <- lapply(lst, `[`, lst$x %in% head(unique(lst$x),7))
leverages$x
weather$location[leverages$ix]
#cooks.distance(Ycr.new_new_new_model)
max(cooks.distance(Ycr.new_new_new_model))
which.max(cooks.distance(Ycr.new_new_new_model))
ggplot(data = weather, aes(x = pressure, y = speed, col = factor(location))) + geom_point(size = 1) +
geom_point(data = weather[leverages$ix,], aes(x = pressure, y = speed, col = ), size = 1, colour = 'chocolate4') +
geom_point(data = weather[1107,], aes(x = pressure, y = speed), size = 1, colour = 'black') +
facet_wrap(~location, scales = "free_x") +
#facet_grid(location~., scales = "free_x") +
labs(title = "speed against Air Pressure (hPa)")
dfbetas(Ycr.new_new_new_model)[1107,]
ggplot(data = weather, aes(x = speed, y = rain^(1/3))) + geom_point(size = 1) +
geom_point(data = weather[1107,], size = 1, colour = 'red') +
#facet_wrap(~location) +
labs(title = "Precipitation (mm) against Air speed")
#######
studentizedResiduals <- rstudent(Ycr.new_new_new_model)
fittedValues <- Ycr.new_new_new_model$fitted.values
studresvval <- data.frame(residuals = studentizedResiduals, fit = fittedValues)
ggplot(data = studresvval, aes(x = fit, y = residuals)) + geom_point(size = 1) +
labs(title = "Studentized residuals against fitted values")
ggplot(data = studresvval, aes(x = fit, y = sqrt(abs(residuals)))) + geom_point(size = 1) +
labs(title = "Root of absolute value of studentized residuals against fitted values")
#######
weather$monthnr <- as.factor(substr(weather$month, 6, 7))
null.model <- lm(rain^(1/3) ~ 1, data = weather)
full.model <- lm(rain^(1/3)~pressure*relevel(location, "Katterjåkk")*speed*temp*monthnr, data = weather)
BIC(null.model)
BIC(full.model)
step(Ycr.new_new_new_model, data = weather, scope = list(lower = null.model, upper = full.model), direction = "both", k =log(nrow(weather)))
#step(lm(rain^(1/3)~pressure*location, data = weather), scope = list(lower = null.model, upper = lm(rain^(1/3)~pressure*location*speed*temp*monthnr, data = weather)), direction = "both")