rm(list = ls())
weather <- read.csv("Data/weather.csv")
library(ggplot2)
## Transformed precipitation
Y = (weather$rain)^(1/3)
(plot.data <-
ggplot(data = weather, aes(x = pressure, y = Y)) +
geom_point(size = 3) +
xlab("Pressure (hPa)") +
ylab("Cube root of rain (mm^(1/3))") +
labs(title = "Rain against pressure") +
theme(text = element_text(size = 18))
)
## Creating all models ####
weather$location <- as.factor(weather$location)
weather$location <- relevel(weather$location, "Uppsala")
model.1a <- lm(Y ~ pressure, data = weather)
model.2b <- lm(Y ~ pressure + location, data = weather)
model.2c <- lm(Y ~ pressure + location + speed, data = weather)
model.2d <- lm(Y ~ pressure + location*speed, data = weather)
## Testing of significance ####
## Prediction and confidence intervals for all models ####
confint(model.1a, level = 0.95)
confint(model.2b, level = 0.95)
confint(model.2c, level = 0.95)
confint(model.2d, level = 0.95)
model.1a.pred <-
cbind(weather,
fit = predict(model.1a),
conf = predict(model.1a, interval = "confidence"),
pred = predict(model.1a, interval = "prediction"))
model.2b.pred <-
cbind(weather,
fit = predict(model.2b),
conf = predict(model.2b, interval = "confidence"),
pred = predict(model.2b, interval = "prediction"))
model.2c.pred <-
cbind(weather,
fit = predict(model.2c),
conf = predict(model.2c, interval = "confidence"),
pred = predict(model.2c, interval = "prediction"))
model.2d.pred <-
cbind(weather,
fit = predict(model.2d),
conf = predict(model.2d, interval = "confidence"),
pred = predict(model.2d, interval = "prediction"))
# Getting rid of extra fits
model.1a.pred$conf.fit <- model.1a.pred$pred.fit <- NULL
model.2b.pred$conf.fit <- model.2b.pred$pred.fit <- NULL
model.2c.pred$conf.fit <- model.2c.pred$pred.fit <- NULL
model.2d.pred$conf.fit <- model.2d.pred$pred.fit <- NULL
# Adding fitted lines
weather$yhat.1a <- predict(model.1a)
weather$yhat.2b <- predict(model.2b)
weather$yhat.2c <- predict(model.2c)
weather$yhat.2d <- predict(model.2d)
## Predict expected precipitation for specific pressure ####
x0 <- data.frame(location = 'Katterjåkk', pressure = 1011, speed = 5)
y0.1a <- predict(model.1a, x0, se.fit = TRUE)
y0.2b <- predict(model.2b, x0, se.fit = TRUE)
y0.2c <- predict(model.2c, x0, se.fit = TRUE)
y0.2d <- predict(model.2d, x0, se.fit = TRUE)
y0.1a$fit^3
y0.2b$fit^3
y0.2c$fit^3
y0.2d$fit^3
## Residue calculations for all models ####
model.1a.pred$e <- model.1a$residuals
model.2b.pred$e <- model.2b$residuals
model.2c.pred$e <- model.2c$residuals
model.2d.pred$e <- model.2d$residuals
(max.e.1a <- max(abs(model.1a.pred$e)))
(max.e.2b <- max(abs(model.2b.pred$e)))
(max.e.2c <- max(abs(model.2c.pred$e)))
(max.e.2d <- max(abs(model.2d.pred$e)))
(model.1a.elims <- c(-max.e.1a, max.e.1a))
(model.2b.elims <- c(-max.e.2b, max.e.2b))
(model.2c.elims <- c(-max.e.2c, max.e.2c))
(model.2d.elims <- c(-max.e.2d, max.e.2d))
## Plots for model 1a ####
# Adding lines and confidence/prediction intervals to the plot
(plot.1a <- plot.data +
geom_line(data = weather, aes(y = yhat.1a), color = "blue", linewidth = 2) +
geom_ribbon(data = model.1a.pred, aes(ymin = conf.lwr, ymax = conf.upr), alpha = 0.2) +
geom_line(data = model.1a.pred, aes(y = pred.lwr),
color = "red", linetype = "dashed", linewidth = 2) +
geom_line(data = model.1a.pred, aes(y = pred.upr),
color = "red", linetype = "dashed", linewidth = 2)
)
## QQ
ggplot(data = model.1a.pred, aes(sample = e)) +
geom_qq(size = 3) +
geom_qq_line() +
labs(tag = "C") +
labs(title = "Normal Q-Q-plot of the residuals") +
facet_wrap(~location) +
theme(text = element_text(size = 18))
ggplot(data = model.1a.pred, aes(x = fit, y = e)) +
geom_point(size = 3) +
geom_hline(yintercept = 0) +
expand_limits(y = model.1a.elims) +
xlab("Predicted rain") +
ylab("Residual") +
labs(tag = "B") +
labs(title = "Residuals vs predicted values Y-hat") +
theme(text = element_text(size = 18))
## Boxplot of residue against location
ggplot(data = model.1a.pred, aes(x = location, y = e, fill = location)) +
geom_boxplot()
## Plots for model 2b ####
# Transformed plot
ggplot(data = model.2b.pred, aes(x = pressure, y = Y,
shape = location, color = location)) +
geom_point(size = 3) +
geom_line(aes(y = fit), linewidth = 1) +
geom_ribbon(aes(ymin = conf.lwr, ymax = conf.upr,
group = location),
alpha = 0.1) +
geom_line(aes(y = pred.lwr), linetype = "dashed", linewidth = 1) +
geom_line(aes(y = pred.upr), linetype = "dashed", linewidth = 1) +
#facet_wrap(~ location) +
xlab("Pressure (hPa)") +
ylab("Transformed precipitation (mm^(1/3))") +
labs(title = "Transformed rain against pressure over each location for model 2b") +
labs(caption = "data, fitted line, conf. and pred. interval") +
theme(text = element_text(size = 18))
#expand_limits(y = c(0, 8))
# Non transformed observation with fitted lines and intervals
ggplot(data = model.2b.pred, aes(x = pressure, y = rain,
shape = location, color = location)) +
geom_point(size = 3) +
geom_line(aes(y = fit^3), linewidth = 1) +
geom_ribbon(aes(ymin = conf.lwr^3, ymax = conf.upr^3,
group = location),
alpha = 0.1) +
geom_line(aes(y = pred.lwr^3), linetype = "dashed", linewidth = 1) +
geom_line(aes(y = pred.upr^3), linetype = "dashed", linewidth = 1) +
facet_wrap(~ location) +
xlab("Pressure (hPa)") +
ylab("Transformed precipitation (mm^(1/3))") +
labs(title = "Transformed rain against pressure over each location for model 2b") +
labs(caption = "data, fitted line, conf. and pred. interval") +
theme(text = element_text(size = 18))
#expand_limits(y = c(0, 8))
## Boxplot
ggplot(data = model.2b.pred, aes(x = location, y = e, fill = location)) + geom_boxplot()
## Plots for model 2c ####
ggplot(data = weather, aes(x = location, y = speed, fill = location)) + geom_boxplot()
ggplot(data = model.2c.pred, aes(x = speed, y = e, fill = location)) +
facet_wrap(~location, scales = "free_x") +
geom_hline(yintercept = 0) +
geom_smooth()
## Plots for model 2d ####
ggplot(data = model.2d.pred, aes(x = speed, y = e, fill = location)) +
facet_wrap(~location, scales = "free_x") +
geom_hline(yintercept = 0) +
geom_smooth()