## Part 1. Modellerar (rain) = beta * (pressure) + epsilon
rm(list = ls())
weather <- read.csv("weather.csv")
library(ggplot2)
Y = (weather$rain)^(1/3)
## plottar data ####
(plot.data <-
ggplot(data = weather, aes(x = pressure, y = Y)) +
geom_point(size = 3) +
xlab("Pressure (hPa)") +
ylab("Rain (mm)") +
labs(title = "Rain against pressure") +
theme(text = element_text(size = 18))
)
# Create linear model
rain.model <- lm(Y ~ pressure, data = weather)
(rain.summary <- summary(rain.model))
confint(rain.model, level = 0.95)
## Add fitted line
weather$yhat.pressure <- predict(rain.model)
(plot.line <- plot.data +
geom_line(aes(y = weather$yhat.pressure), color = "blue", linewidth = 1) +
labs(caption = "data and fitted line")
)
# Create prediction and confidence intervals
rain.pred <-
cbind(weather,
fit = predict(rain.model),
conf = predict(rain.model, interval = "confidence"),
pred = predict(rain.model, interval = "prediction"))
# get rid of the extra fits
rain.pred$conf.fit <- rain.pred$pred.fit <- NULL
## Add confidence interval####
(
plot.conf <- plot.line +
geom_ribbon(data = rain.pred, aes(ymin = conf.lwr, ymax = conf.upr), alpha = 0.2) +
labs(caption = "data, fitted line and 95% confidence interval")
)
## Add prediction interval####
plot.conf +
geom_line(data = rain.pred, aes(y = pred.lwr),
color = "red", linetype = "dashed", linewidth = 1) +
geom_line(data = rain.pred, aes(y = pred.upr),
color = "red", linetype = "dashed", linewidth = 1) +
labs(caption = "data, fitted line, 95% confidence and prediction intervals")
# Basic residual analysis####
## Add the residuals to the predicted data##
rain.pred$e <- rain.model$residuals
head(rain.pred)
# Save the max-value in order to make the y-axis symmetrical
# in the plots.
(max.e <- max(abs(rain.pred$e)))
(rain.elims <- c(-max.e, max.e))
## QQ
ggplot(data = rain.pred, aes(sample = e)) +
geom_qq(size = 3) +
geom_qq_line() +
labs(tag = "C") +
labs(title = "Normal Q-Q-plot of the residuals") +
facet_wrap(~location) +
theme(text = element_text(size = 18))
ggplot(data = rain.pred, aes(x = fit, y = e)) +
geom_point(size = 3) +
geom_hline(yintercept = 0) +
expand_limits(y = rain.elims) +
xlab("Predicted rain") +
ylab("Residual") +
labs(tag = "B") +
labs(title = "Residuals vs predicted values Y-hat") +
theme(text = element_text(size = 18))
x <- c(990, 1010, 1030)
(beta <- rain.model$coefficients[2])
(derive.mod1 <- beta)
(derive.mod2 <- 1/x)
derive.mod3 <- 1/3 * 1/((beta * x) ^ 0.3333) * beta
weather$location <- as.factor(weather$location)
## Boxplot
ggplot(data = rain.pred, aes(x = location, y = e, fill = location)) + geom_boxplot()
weather$location <- relevel(weather$location, "Uppsala")
model.2b <- lm(Y ~ pressure + location, data = weather)
model.2b.summary <- summary(model.2b)
model.2b.summary
(confint(model.2b))
# Create prediction and confidence intervals
model.2b.pred <-
cbind(weather,
fit = predict(model.2b),
conf = predict(model.2b, interval = "confidence"),
pred = predict(model.2b, interval = "prediction"))
# get rid of the extra fits
model.2b.pred$conf.fit <- model.2b.pred$pred.fit <- NULL
# Basic residual analysis####
## Add the residuals to the predicted data##
model.2b.pred$e <- model.2b$residuals
head(model.2b.pred)
# Save the max-value in order to make the y-axis symmetrical
# in the plots.
(max.e <- max(abs(model.2b.pred$e)))
(model.2b.elims <- c(-max.e, max.e))
## Boxplot
ggplot(data = model.2b.pred, aes(x = location, y = e, fill = location)) + geom_boxplot()
## Add fitted line
weather$yhat.pressure.2b <- predict(model.2b)
(plot.line <- plot.data +
geom_line(aes(y = weather$yhat.pressure.2b), color = "blue", linewidth = 1) +
labs(caption = "data and fitted line")
)
ggplot(weather, aes(x = pressure, y = Y, color = location)) +
geom_point(aes(shape = location), size = 3) +
facet_wrap(~ location) +
xlab("pressure") +
ylab("Rain1/3") +
labs(title = "") +
labs(caption = "") +
theme(text = element_text(size = 18)) +
expand_limits(y = c(0, 8))
ggplot(data = model.2b.pred, aes(x = pressure, y = rain^(1/3),
shape = location, color = location)) +
geom_point(size = 3) +
geom_line(aes(y = fit), linewidth = 1) +
geom_ribbon(aes(ymin = conf.lwr, ymax = conf.upr,
group = location),
alpha = 0.1) +
geom_line(aes(y = pred.lwr), linetype = "dashed", linewidth = 1) +
geom_line(aes(y = pred.upr), linetype = "dashed", linewidth = 1) +
facet_wrap(~ location) +
xlab("Fertilizer") +
ylab("Head weight") +
labs(title = "Head weight as a function of soil type and fertilizer") +
labs(caption = "data, fitted line, conf. and pred. interval") +
theme(text = element_text(size = 18)) +
expand_limits(y = c(0, 8))
ggplot(weather, aes(x = speed, y = Y)) +
geom_point(size = 3) +
xlab("wind") +
ylab("Rain1/3") +
labs(title = "") +
labs(caption = "") +
theme(text = element_text(size = 18)) +
expand_limits(y = c(0, 8))
ggplot(weather, aes(x = pressure, y = speed)) +
geom_point(size = 3) +
xlab("wind") +
ylab("Rain1/3") +
labs(title = "") +
labs(caption = "") +
theme(text = element_text(size = 18)) +
expand_limits(y = c(0, 8))
## Model 2c.¨
weather$location <- relevel(weather$location, "Uppsala")
ggplot(data = weather, aes(x = location, y = speed, fill = location)) + geom_boxplot()
model.2c <- lm(Y ~ pressure + location + speed, data = weather)
model.2c.summary <- summary(model.2c)
model.2c.summary
(confint(model.2c))
model.2c.pred <-
cbind(weather,
fit = predict(model.2c),
conf = predict(model.2c, interval = "confidence"),
pred = predict(model.2c, interval = "prediction"))
# get rid of the extra fits
model.2c.pred$conf.fit <- model.2c.pred$pred.fit <- NULL
# Basic residual analysis####
## Add the residuals to the predicted data##
model.2c.pred$e <- model.2c$residuals
head(model.2c.pred)
# Save the max-value in order to make the y-axis symmetrical
# in the plots.
(max.e <- max(abs(model.2c.pred$e)))
(model.2c.elims <- c(-max.e, max.e))
## Boxplot
ggplot(data = model.2c.pred, aes(x = speed, y = e, fill = location)) + facet_wrap(~location, scales = "free_x") + geom_hline(yintercept = 0) + geom_smooth()
## Model 2d.¨
weather$location <- relevel(weather$location, "Uppsala")
model.2d <- lm(Y ~ pressure + location*speed, data = weather)
model.2d.summary <- summary(model.2d)
model.2d.summary
(confint(model.2d))
model.2d.pred <-
cbind(weather,
fit = predict(model.2d),
conf = predict(model.2d, interval = "confidence"),
pred = predict(model.2d, interval = "prediction"))
# get rid of the extra fits
model.2d.pred$conf.fit <- model.2d.pred$pred.fit <- NULL
# Basic residual analysis####
## Add the residuals to the predicted data##
model.2d.pred$e <- model.2d$residuals
head(model.2d.pred)
# Save the max-value in order to make the y-axis symmetrical
# in the plots.
(max.e <- max(abs(model.2d.pred$e)))
(model.2d.elims <- c(-max.e, max.e))
## Boxplot
ggplot(data = model.2d.pred, aes(x = speed, y = e, fill = location)) + facet_wrap(~location, scales = "free_x") + geom_hline(yintercept = 0) + geom_smooth()