Untitled

mail@pastecode.io avatar
unknown
r
2 years ago
7.3 kB
2
Indexable
## Part 1. Modellerar (rain) = beta * (pressure) + epsilon

rm(list = ls())
weather <- read.csv("weather.csv")
library(ggplot2)

Y = (weather$rain)^(1/3)

## plottar data ####
(plot.data <- 
   ggplot(data = weather, aes(x = pressure, y = Y)) + 
   geom_point(size = 3) +
   xlab("Pressure (hPa)") +
   ylab("Rain (mm)") +
   labs(title = "Rain against pressure") +
   theme(text = element_text(size = 18))
)

# Create linear model
rain.model <- lm(Y ~ pressure, data = weather)
(rain.summary <- summary(rain.model))
confint(rain.model, level = 0.95)

## Add fitted line
weather$yhat.pressure <- predict(rain.model)
(plot.line <- plot.data + 
    geom_line(aes(y = weather$yhat.pressure), color = "blue", linewidth = 1) +
    labs(caption = "data and fitted line")
)

# Create prediction and confidence intervals
rain.pred <-
  cbind(weather,
        fit = predict(rain.model),
        conf = predict(rain.model, interval = "confidence"),
        pred = predict(rain.model, interval = "prediction"))

# get rid of the extra fits
rain.pred$conf.fit <- rain.pred$pred.fit <- NULL

## Add confidence interval####
(
  plot.conf <- plot.line + 
    geom_ribbon(data = rain.pred, aes(ymin = conf.lwr, ymax = conf.upr), alpha = 0.2) +
    labs(caption = "data, fitted line and 95% confidence interval")
)

## Add prediction interval####
plot.conf +
  geom_line(data = rain.pred, aes(y = pred.lwr),
            color = "red", linetype = "dashed", linewidth = 1) +
  geom_line(data = rain.pred, aes(y = pred.upr),
            color = "red", linetype = "dashed", linewidth = 1) +
  labs(caption = "data, fitted line, 95% confidence and prediction intervals")




# Basic residual analysis####
## Add the residuals to the predicted data##
rain.pred$e <- rain.model$residuals
head(rain.pred)

# Save the max-value in order to make the y-axis symmetrical
# in the plots.
(max.e <- max(abs(rain.pred$e)))
(rain.elims <- c(-max.e, max.e))

## QQ
ggplot(data = rain.pred, aes(sample = e)) +
  geom_qq(size = 3) +
  geom_qq_line() +
  labs(tag = "C") +
  labs(title = "Normal Q-Q-plot of the residuals") +
  facet_wrap(~location) +
  theme(text = element_text(size = 18))


ggplot(data = rain.pred, aes(x = fit, y = e)) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0) +
  expand_limits(y = rain.elims) +
  xlab("Predicted rain") +
  ylab("Residual") +
  labs(tag = "B") +
  labs(title = "Residuals vs predicted values Y-hat") +
  theme(text = element_text(size = 18))


x <- c(990, 1010, 1030)
(beta <- rain.model$coefficients[2])
(derive.mod1 <- beta)
(derive.mod2 <- 1/x)
derive.mod3 <- 1/3 * 1/((beta * x) ^ 0.3333) * beta

weather$location <- as.factor(weather$location)

## Boxplot
ggplot(data = rain.pred, aes(x = location, y = e, fill = location)) + geom_boxplot()
weather$location <- relevel(weather$location, "Uppsala")

model.2b <- lm(Y ~ pressure + location, data = weather)
model.2b.summary <- summary(model.2b)
model.2b.summary
(confint(model.2b))

# Create prediction and confidence intervals
model.2b.pred <-
  cbind(weather,
        fit = predict(model.2b),
        conf = predict(model.2b, interval = "confidence"),
        pred = predict(model.2b, interval = "prediction"))

# get rid of the extra fits
model.2b.pred$conf.fit <- model.2b.pred$pred.fit <- NULL

# Basic residual analysis####
## Add the residuals to the predicted data##
model.2b.pred$e <- model.2b$residuals
head(model.2b.pred)

# Save the max-value in order to make the y-axis symmetrical
# in the plots.
(max.e <- max(abs(model.2b.pred$e)))
(model.2b.elims <- c(-max.e, max.e))

## Boxplot
ggplot(data = model.2b.pred, aes(x = location, y = e, fill = location)) + geom_boxplot()


## Add fitted line
weather$yhat.pressure.2b <- predict(model.2b)
(plot.line <- plot.data + 
    geom_line(aes(y = weather$yhat.pressure.2b), color = "blue", linewidth = 1) +
    labs(caption = "data and fitted line")
)

ggplot(weather, aes(x = pressure, y = Y, color = location)) +
  geom_point(aes(shape = location), size = 3) +
  facet_wrap(~ location) +
  xlab("pressure") +
  ylab("Rain1/3") +
  labs(title = "") +
  labs(caption = "") +
  theme(text = element_text(size = 18)) +
  expand_limits(y = c(0, 8))

ggplot(data = model.2b.pred, aes(x = pressure, y = rain^(1/3),
                                   shape = location, color = location)) +
  geom_point(size = 3) +
  geom_line(aes(y = fit), linewidth = 1) +
  geom_ribbon(aes(ymin = conf.lwr, ymax = conf.upr,
                  group = location),
              alpha = 0.1) +
  geom_line(aes(y = pred.lwr), linetype = "dashed", linewidth = 1) +
  geom_line(aes(y = pred.upr), linetype = "dashed", linewidth = 1) +
  facet_wrap(~ location) +
  xlab("Fertilizer") +
  ylab("Head weight") + 
  labs(title = "Head weight as a function of soil type and fertilizer") +
  labs(caption = "data, fitted line, conf. and pred. interval") +
  theme(text = element_text(size = 18)) +
  expand_limits(y = c(0, 8))


ggplot(weather, aes(x = speed, y = Y)) +
  geom_point(size = 3) +
  xlab("wind") +
  ylab("Rain1/3") +
  labs(title = "") +
  labs(caption = "") +
  theme(text = element_text(size = 18)) +
  expand_limits(y = c(0, 8))

ggplot(weather, aes(x = pressure, y = speed)) +
  geom_point(size = 3) +
  xlab("wind") +
  ylab("Rain1/3") +
  labs(title = "") +
  labs(caption = "") +
  theme(text = element_text(size = 18)) +
  expand_limits(y = c(0, 8))

## Model 2c.¨
weather$location <- relevel(weather$location, "Uppsala")
ggplot(data = weather, aes(x = location, y = speed, fill = location)) + geom_boxplot()
model.2c <- lm(Y ~ pressure + location + speed, data = weather)
model.2c.summary <- summary(model.2c)
model.2c.summary
(confint(model.2c))

model.2c.pred <-
  cbind(weather,
        fit = predict(model.2c),
        conf = predict(model.2c, interval = "confidence"),
        pred = predict(model.2c, interval = "prediction"))

# get rid of the extra fits
model.2c.pred$conf.fit <- model.2c.pred$pred.fit <- NULL

# Basic residual analysis####
## Add the residuals to the predicted data##
model.2c.pred$e <- model.2c$residuals
head(model.2c.pred)

# Save the max-value in order to make the y-axis symmetrical
# in the plots.
(max.e <- max(abs(model.2c.pred$e)))
(model.2c.elims <- c(-max.e, max.e))

## Boxplot
ggplot(data = model.2c.pred, aes(x = speed, y = e, fill = location)) + facet_wrap(~location, scales = "free_x") + geom_hline(yintercept = 0) + geom_smooth()

## Model 2d.¨
weather$location <- relevel(weather$location, "Uppsala")
model.2d <- lm(Y ~ pressure + location*speed, data = weather)
model.2d.summary <- summary(model.2d)
model.2d.summary
(confint(model.2d))

model.2d.pred <-
  cbind(weather,
        fit = predict(model.2d),
        conf = predict(model.2d, interval = "confidence"),
        pred = predict(model.2d, interval = "prediction"))

# get rid of the extra fits
model.2d.pred$conf.fit <- model.2d.pred$pred.fit <- NULL

# Basic residual analysis####
## Add the residuals to the predicted data##
model.2d.pred$e <- model.2d$residuals
head(model.2d.pred)

# Save the max-value in order to make the y-axis symmetrical
# in the plots.
(max.e <- max(abs(model.2d.pred$e)))
(model.2d.elims <- c(-max.e, max.e))

## Boxplot
ggplot(data = model.2d.pred, aes(x = speed, y = e, fill = location)) + facet_wrap(~location, scales = "free_x") + geom_hline(yintercept = 0) + geom_smooth()