Untitled

mail@pastecode.io avatar
unknown
r
2 years ago
6.5 kB
0
Indexable
rm(list = ls())
weather <- read.csv("Data/weather.csv")
library(ggplot2)

## Transformed precipitation
Y = (weather$rain)^(1/3)
(plot.data <- 
    ggplot(data = weather, aes(x = pressure, y = Y)) + 
    geom_point(size = 3) +
    xlab("Pressure (hPa)") +
    ylab("Cube root of rain (mm^(1/3))") +
    labs(title = "Rain against pressure") +
    theme(text = element_text(size = 18))
)

## Creating all models ####
weather$location <- as.factor(weather$location)
weather$location <- relevel(weather$location, "Uppsala")

model.1a <- lm(Y ~ pressure, data = weather)
model.2b <- lm(Y ~ pressure + location, data = weather)
model.2c <- lm(Y ~ pressure + location + speed, data = weather)
model.2d <- lm(Y ~ pressure + location*speed, data = weather)

## Testing of significance ####

## Prediction and confidence intervals for all models ####
confint(model.1a, level = 0.95)
confint(model.2b, level = 0.95)
confint(model.2c, level = 0.95)
confint(model.2d, level = 0.95)

model.1a.pred <-
  cbind(weather,
        fit = predict(model.1a),
        conf = predict(model.1a, interval = "confidence"),
        pred = predict(model.1a, interval = "prediction"))

model.2b.pred <-
  cbind(weather,
        fit = predict(model.2b),
        conf = predict(model.2b, interval = "confidence"),
        pred = predict(model.2b, interval = "prediction"))

model.2c.pred <-
  cbind(weather,
        fit = predict(model.2c),
        conf = predict(model.2c, interval = "confidence"),
        pred = predict(model.2c, interval = "prediction"))

model.2d.pred <-
  cbind(weather,
        fit = predict(model.2d),
        conf = predict(model.2d, interval = "confidence"),
        pred = predict(model.2d, interval = "prediction"))

# Getting rid of extra fits
model.1a.pred$conf.fit <- model.1a.pred$pred.fit <- NULL
model.2b.pred$conf.fit <- model.2b.pred$pred.fit <- NULL
model.2c.pred$conf.fit <- model.2c.pred$pred.fit <- NULL
model.2d.pred$conf.fit <- model.2d.pred$pred.fit <- NULL

# Adding fitted lines
weather$yhat.1a <- predict(model.1a)
weather$yhat.2b <- predict(model.2b)
weather$yhat.2c <- predict(model.2c)
weather$yhat.2d <- predict(model.2d)

## Predict expected precipitation for specific pressure ####
x0 <- data.frame(location = 'Katterjåkk', pressure = 1011, speed = 5)
y0.1a <- predict(model.1a, x0, se.fit = TRUE)
y0.2b <- predict(model.2b, x0, se.fit = TRUE)
y0.2c <- predict(model.2c, x0, se.fit = TRUE)
y0.2d <- predict(model.2d, x0, se.fit = TRUE)
y0.1a$fit^3
y0.2b$fit^3
y0.2c$fit^3
y0.2d$fit^3




## Residue calculations for all models ####
model.1a.pred$e <- model.1a$residuals
model.2b.pred$e <- model.2b$residuals
model.2c.pred$e <- model.2c$residuals
model.2d.pred$e <- model.2d$residuals

(max.e.1a <- max(abs(model.1a.pred$e)))
(max.e.2b <- max(abs(model.2b.pred$e)))
(max.e.2c <- max(abs(model.2c.pred$e)))
(max.e.2d <- max(abs(model.2d.pred$e)))

(model.1a.elims <- c(-max.e.1a, max.e.1a))
(model.2b.elims <- c(-max.e.2b, max.e.2b))
(model.2c.elims <- c(-max.e.2c, max.e.2c))
(model.2d.elims <- c(-max.e.2d, max.e.2d))


## Plots for model 1a ####

# Adding lines and confidence/prediction intervals to the plot
(plot.1a <- plot.data + 
    geom_line(data = weather, aes(y = yhat.1a), color = "blue", linewidth = 2) +
    geom_ribbon(data = model.1a.pred, aes(ymin = conf.lwr, ymax = conf.upr), alpha = 0.2) +
    geom_line(data = model.1a.pred, aes(y = pred.lwr),
              color = "red", linetype = "dashed", linewidth = 2) +
    geom_line(data = model.1a.pred, aes(y = pred.upr),
              color = "red", linetype = "dashed", linewidth = 2) 
)

## QQ
ggplot(data = model.1a.pred, aes(sample = e)) +
  geom_qq(size = 3) +
  geom_qq_line() +
  labs(tag = "C") +
  labs(title = "Normal Q-Q-plot of the residuals") +
  facet_wrap(~location) +
  theme(text = element_text(size = 18))


ggplot(data = model.1a.pred, aes(x = fit, y = e)) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0) +
  expand_limits(y = model.1a.elims) +
  xlab("Predicted rain") +
  ylab("Residual") +
  labs(tag = "B") +
  labs(title = "Residuals vs predicted values Y-hat") +
  theme(text = element_text(size = 18))

## Boxplot of residue against location
ggplot(data = model.1a.pred, aes(x = location, y = e, fill = location)) + 
  geom_boxplot()

## Plots for model 2b ####

# Transformed plot
ggplot(data = model.2b.pred, aes(x = pressure, y = Y,
                                 shape = location, color = location)) +
  geom_point(size = 3) +
  geom_line(aes(y = fit), linewidth = 1) +
  geom_ribbon(aes(ymin = conf.lwr, ymax = conf.upr,
                  group = location),
              alpha = 0.1) +
  geom_line(aes(y = pred.lwr), linetype = "dashed", linewidth = 1) +
  geom_line(aes(y = pred.upr), linetype = "dashed", linewidth = 1) +
  #facet_wrap(~ location) +
  xlab("Pressure (hPa)") +
  ylab("Transformed precipitation (mm^(1/3))") + 
  labs(title = "Transformed rain against pressure over each location for model 2b") +
  labs(caption = "data, fitted line, conf. and pred. interval") +
  theme(text = element_text(size = 18))
#expand_limits(y = c(0, 8))



# Non transformed observation with fitted lines and intervals
ggplot(data = model.2b.pred, aes(x = pressure, y = rain,
                                 shape = location, color = location)) +
  geom_point(size = 3) +
  geom_line(aes(y = fit^3), linewidth = 1) +
  geom_ribbon(aes(ymin = conf.lwr^3, ymax = conf.upr^3,
                  group = location),
              alpha = 0.1) +
  geom_line(aes(y = pred.lwr^3), linetype = "dashed", linewidth = 1) +
  geom_line(aes(y = pred.upr^3), linetype = "dashed", linewidth = 1) +
  facet_wrap(~ location) +
  xlab("Pressure (hPa)") +
  ylab("Transformed precipitation (mm^(1/3))") + 
  labs(title = "Transformed rain against pressure over each location for model 2b") +
  labs(caption = "data, fitted line, conf. and pred. interval") +
  theme(text = element_text(size = 18))
  #expand_limits(y = c(0, 8))

## Boxplot
ggplot(data = model.2b.pred, aes(x = location, y = e, fill = location)) + geom_boxplot()

## Plots for model 2c ####
ggplot(data = weather, aes(x = location, y = speed, fill = location)) + geom_boxplot()
ggplot(data = model.2c.pred, aes(x = speed, y = e, fill = location)) + 
  facet_wrap(~location, scales = "free_x") + 
  geom_hline(yintercept = 0) + 
  geom_smooth()
## Plots for model 2d ####
ggplot(data = model.2d.pred, aes(x = speed, y = e, fill = location)) + 
  facet_wrap(~location, scales = "free_x") + 
  geom_hline(yintercept = 0) + 
  geom_smooth()