Untitled

mail@pastecode.io avatar
unknown
r
a year ago
15 kB
1
Indexable
Never
rm(list=ls())
library(ggplot2)
library(patchwork)
weather <- read.csv("Data/weather.csv")


(Y.model <- lm(rain ~pressure, data = weather))

p1aY = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) +
  geom_line(aes(y = Y.model$coefficients[1] + pressure*Y.model$coefficients[2], col = "est"), linetype = "dashed") +
  labs(title = "Precipitation (mm) against Air Pressure (hPa)")
# Exponential decay

press <- data.frame(weather[4])
Y.pred <-
  cbind(press,
        fit = predict(Y.model, press),
        conf = predict(Y.model, press, interval = "confidence"),
        pred = predict(Y.model, press, interval = "prediction"))
head(Y.pred)
# get rid of the extra fits
Y.pred$conf.fit <- Y.pred$pred.fit <- NULL
head(Y.pred)

Y.pred$e <- Y.model$residuals
head(Y.pred)
(max.e <- max(abs(Y.pred$e)))
(Y.elims <- c(-max.e, max.e))
## Plot against yhat####
# Add a horizontal line at y=0,
# and expand the y-axis to include +/- max residual.

p1aYres = ggplot(data = Y.pred, aes(x = fit, y = e)) +
  geom_point(size = 1) +
  geom_hline(yintercept = 0) +
  expand_limits(y = Y.elims) +
  xlab(" ") +
  ylab(" ") +
  labs(tag = "identity") +
  labs(title = "Residuals vs Predicted Precipitation") +
  theme(text = element_text(size = 18))


  ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) +
  geom_line(aes(y = Y.pred$fit, col = "fit"), linetype = "dashed") +
  geom_line(aes(y = Y.pred$conf.lwr, col = "conf"), linetype = "dashed") +
  geom_line(aes(y = Y.pred$conf.upr, col = "conf"), linetype = "dashed") +
  geom_line(aes(y = Y.pred$pred.lwr, col = "pred"), linetype = "dashed") +
  geom_line(aes(y = Y.pred$pred.upr, col = "pred"), linetype = "dashed") +
  labs(title = "Precipitation (mm) against Air Pressure (hPa)")



(Ylog.model <- lm(log(rain) ~ pressure, data = weather))

 p1alogY = ggplot(data = weather, aes(x = pressure, y = log(rain))) + geom_point(size = 1) +
  geom_line(aes(y = Ylog.model$coefficients[1] + pressure*Ylog.model$coefficients[2], col = "est"), linetype = "dashed") +
  labs(title = "Precipitation (log(mm)) against Air Pressure (hPa)")
# Linear to the left, but after 1010 hPa the data seems to have another slope
p1alogY

## resid anal ###
Ylog.pred <-
  cbind(press,
        fit = predict(Ylog.model, press),
        conf = predict(Ylog.model, press, interval = "confidence"),
        pred = predict(Ylog.model, press, interval = "prediction"))
head(Y.pred)
# get rid of the extra fits
Ylog.pred$conf.fit <- Ylog.pred$pred.fit <- NULL
head(Ylog.pred)

Ylog.pred$e <- Ylog.model$residuals
head(Ylog.pred)
(max.e <- max(abs(Ylog.pred$e)))
(Ylog.elims <- c(-max.e, max.e))
## Plot against yhat####
# Add a horizontal line at y=0,
# and expand the y-axis to include +/- max residual.

p1alogYres = ggplot(data = Ylog.pred, aes(x = fit, y = e)) +
  geom_point(size = 1) +
  geom_hline(yintercept = 0) +
  expand_limits(y = Ylog.elims) +
  xlab(" ") +
  ylab("Residual") +
  labs(tag = "log") +
  #labs(title = "Residuals vs Predicted log-Precipitation") +
  theme(text = element_text(size = 18))


p1alogYints = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) +
  geom_line(aes(y = exp(Ylog.pred$fit), col = "fit"), linetype = "dashed") +
  geom_line(aes(y = exp(Ylog.pred$conf.lwr), col = "conf"), linetype = "dashed") +
  geom_line(aes(y = exp(Ylog.pred$conf.upr), col = "conf"), linetype = "dashed") +
  geom_line(aes(y = exp(Ylog.pred$pred.lwr), col = "pred"), linetype = "dashed") +
  geom_line(aes(y = exp(Ylog.pred$pred.upr), col = "pred"), linetype = "dashed") +
  labs(title = "Precipitation log(mm) against Air Pressure (hPa)")
p1alogYints


(Ycr.model <- lm(rain^(1/3) ~ pressure, data = weather))

p1aYcr = ggplot(data = weather, aes(x = pressure, y = rain^(1/3))) + geom_point(size = 1) +
  geom_line(aes(y = Ycr.model$coefficients[1] +pressure*Ycr.model$coefficients[2], col = "est"), linetype = "dashed") +
  labs(title = "Precipitation ((mm)^1/3) Against Air Pressure (hPa)")
# Linear, fits our model Y = xB + e
p1aYcr

## resid anal ### 
Ycr.pred <-
  cbind(press,
        fit = predict(Ycr.model, press),
        conf = predict(Ycr.model, press, interval = "confidence"),
        pred = predict(Ycr.model, press, interval = "prediction"))
head(Y.pred)
# get rid of the extra fits
Ycr.pred$conf.fit <- Ycr.pred$pred.fit <- NULL
head(Ycr.pred)

Ycr.pred$e <- Ycr.model$residuals
head(Ycr.pred)
(max.e <- max(abs(Ycr.pred$e)))
(Ycr.elims <- c(-max.e, max.e))
## Plot against yhat####
# Add a horizontal line at y=0,
# and expand the y-axis to include +/- max residual.

p1aYcrres = ggplot(data = Ycr.pred, aes(x = fit, y = e)) +
  geom_point(size = 1) +
  geom_hline(yintercept = 0) +
  expand_limits(y = Ycr.elims) +
  xlab("Predicted Precipitation") +
  ylab(" ") +
  labs(tag = "cube root") +
  #labs(title = "Residuals vs Predicted Precipitation") +
  theme(text = element_text(size = 18))


  p1aYcrints = ggplot(data = weather, aes(x = pressure, y = rain)) + geom_point(size = 1) +
  geom_line(aes(y = (Ycr.pred$fit)^3, col = "fit"), linetype = "dashed") +
  geom_line(aes(y = (Ycr.pred$conf.lwr)^3, col = "conf"), linetype = "dashed") +
  geom_line(aes(y = (Ycr.pred$conf.upr)^3, col = "conf"), linetype = "dashed") +
  geom_line(aes(y = (Ycr.pred$pred.lwr)^3, col = "pred"), linetype = "dashed") +
  geom_line(aes(y = (Ycr.pred$pred.upr)^3, col = "pred"), linetype = "dashed") +
  labs(title = "95%-Confidence and Prediction Intervals", x = "Air Pressure (hPa)", y = "Precipitation (mm)^(1/3)")
  
  
  
  
  # FINAL RESIDUAL PLOTS
  p1aYres/p1alogYres/p1aYcrres
  
  # FINAL ESTIMATED RELATIONSHIP PLOTS
  p1aY/p1alogY/p1aYcr
  
  # FINAL INVERSE TRANSFORMED CONFIDENCE AND PREDICTION INTERVAL
  p1alogYints/p1aYcrints
  
  
  
  
  
  
  
  
  
  
  
################## Part 2: #####################
(sum( weather[1] == "Lund"))
(sum( weather[1] == "Katterjåkk"))
(sum( weather[1] == "Uppsala"))
summary(Ycr.model) # p-value < 2.2e-16 so we reject H_0.

(weather$location <- as.factor(weather$location))

(dats <- data.frame(residuals = Ycr.pred$e, locations = weather$location ))
  
bp <- ggplot(data = dats, aes(x = locations, y = residuals, fill = locations)) +
  geom_boxplot() 
bp + labs(title = "Boxplot; Residuals for every location", x = "Region", y="Resiudals") + 
  theme_classic()


# 2b continued; Katterjåkk valt, minst outlliers.
# relevel med ref = "Katterjåkk".-> anova(Ycr.model, NEW MODEL: (med ref=Katterjåkk))
# Boxplotta detta med ny modell

Ycr.new_model<-lm( rain^(1/3) ~ pressure + relevel(location, ref = "Katterjåkk"), data = weather)
(summary(Ycr.new_model))
Ycr.pred2b <- predict(Ycr.new_model, interval = "prediction")
Ycr.conf2 <- predict(Ycr.new_model, interval = "confidence")

ggplot(data = weather, aes(x = pressure, y = rain^(1/3), col = factor(location))) + geom_point(size = 0) +
  geom_line(aes(y = Ycr.pred2b[1:1496,1]), linetype = "dashed") +
  geom_line(aes(y = Ycr.pred2b[1:1496,2]), linetype = "dashed") +
  geom_line(aes(y = Ycr.pred2b[1:1496,3]), linetype = "dashed") +
  geom_line(aes(y = Ycr.conf2[1:1496,2]), linetype = "dashed") +
  geom_line(aes(y = Ycr.conf2[1:1496,3]), linetype = "dashed") +
  labs(title = "Precipitation (mm)^(1/3) against Air Pressure (hPa)")

ggplot(data = weather, aes(x = pressure, y = rain^(1/3), col = factor(location))) + geom_point(size = 0) +
  geom_line(aes(y = Ycr.pred2b[1:1496,1]), linetype = "dashed") +
  geom_line(aes(y = Ycr.pred2b[1:1496,2]), linetype = "dashed") +
  geom_line(aes(y = Ycr.pred2b[1:1496,3]), linetype = "dashed") +
  geom_line(aes(y = Ycr.conf2[1:1496,2]), linetype = "dashed") +
  geom_line(aes(y = Ycr.conf2[1:1496,3]), linetype = "dashed") +
  facet_grid(location~. ) +
  labs(title = "Precipitation (mm)^(1/3) against Air Pressure (hPa)")


#anova(lm(log(Pb)~I(year-1975), data = Pb_all), Pb.model )
anova(Ycr.model, Ycr.new_model)

Ycr.pred$e2 <- Ycr.new_model$residuals
head(Ycr.pred)
(max.e2 <- max(abs(Ycr.pred$e2)))
(Ycr.elims2 <- c(-max.e2, max.e2))


(dats_new <- data.frame(residuals = Ycr.pred$e2, locations = weather$location ))

bp_new <- ggplot(data = dats_new, aes(x = locations, y = residuals, fill = locations)) +
  geom_boxplot() 
bp_new + labs(title = "Boxplot; Residuals for every location New Model", x = "Region", y="Resiudals") + 
  theme_classic()

bp + labs(title = "Former model") + bp_new + labs(title = "New model")
# DIFFERENCE; residuals are now centered at zero and have fewer outliers

# ########### 2c #################
p2c1 <- (ggplot(data = weather, aes(x = speed, y = rain^(1/3) )) + geom_point(size = 1))
p2c2 <- (ggplot(data = weather, aes(x = pressure, y = speed)) + geom_point(size=1))
(p2c1 + labs(title = "Transf. Precip. (mm)^(1/3) over Avg. Monthly Wind Speed (m/s)"))/(p2c2 + labs(title = "Ibid. over Air pressure (hPa)"))

bpnews <- ggplot(data = weather, aes(x = location, y = speed, fill = location)) +
  geom_boxplot()#+
bpnews + theme_classic() + labs(title = "Boxplots; Avg. Monthly Wind Speed over Location")

Ycr.new_new_model <- lm( rain^(1/3) ~ pressure + relevel(location, ref = "Katterjåkk") + speed, data = weather)
summary(Ycr.new_new_model)
confint(Ycr.new_new_model)
anova(Ycr.new_model, Ycr.new_new_model)

########## 2d ###########
ggplot(data = weather, aes(x = speed, y = Ycr.new_new_model$residuals, col = factor(location)))  + 
  geom_point(size = 1) + facet_grid(location~., scales = "free_x") + 
  geom_smooth() + labs(title = "Residuals for Model 2c) over speed", y = "Residuals")

Ycr.new_new_new_model<- lm( rain^(1/3)~pressure+speed*relevel(location, ref = "Katterjåkk"), data = weather)

summary(Ycr.new_new_new_model)
confint(Ycr.new_new_new_model)

ggplot(data = weather, aes(x = speed, y = Ycr.new_new_new_model$residuals, col = factor(location)))  + 
  geom_point(size = 1) + facet_grid(location~., scales = "free_x") + 
  geom_smooth() + labs(title = "Residuals for Model 2d) over speed", y = "Residuals")

############## 2e ##############
newtimetwo <- data.frame(pressure = c(1011), speed = c(2), location = factor(rep(c("Katterjåkk","Lund","Uppsala"))))
newtimefive <- data.frame(pressure = c(1011), speed = c(5), location = factor(rep(c("Katterjåkk","Lund","Uppsala"))))


#2b ####
YlocKatterjåkkTwo.pred <-
  cbind(newtimetwo,
        fit = predict(Ycr.new_model, newtimetwo)^3,
        conf = predict(Ycr.new_model, newtimetwo, interval = "confidence")^3,
        pred = predict(Ycr.new_model, newtimetwo, interval = "prediction")^3)

# get rid of the extra fits
YlocKatterjåkkTwo.pred$conf.fit <- YlocKatterjåkkTwo.pred$pred.fit <- NULL

YlocKatterjåkkFive.pred <-
  cbind(newtimefive,
        fit = predict(Ycr.new_model, newtimefive)^3,
        conf = predict(Ycr.new_model, newtimefive, interval = "confidence")^3,
        pred = predict(Ycr.new_model, newtimefive, interval = "prediction")^3)

# get rid of the extra fits
YlocKatterjåkkFive.pred$conf.fit <- YlocKatterjåkkFive.pred$pred.fit <- NULL



#2c ####
YcrNewNewTwo.pred <-
  cbind(newtimetwo,
        fit = predict(Ycr.new_new_model, newtimetwo)^3,
        conf = predict(Ycr.new_new_model, newtimetwo, interval = "confidence")^3,
        pred = predict(Ycr.new_new_model, newtimetwo, interval = "prediction")^3)

# get rid of the extra fits
YcrNewNewTwo.pred$conf.fit <- YcrNewNewTwo.pred$pred.fit <- NULL

YcrNewNewFive.pred <-
  cbind(newtimefive,
        fit = predict(Ycr.new_new_model, newtimefive)^3,
        conf = predict(Ycr.new_new_model, newtimefive, interval = "confidence")^3,
        pred = predict(Ycr.new_new_model, newtimefive, interval = "prediction")^3)

# get rid of the extra fits
YcrNewNewFive.pred$conf.fit <- YcrNewNewFive.pred$pred.fit <- NULL


#2d ####
YcrNewNewNewTwo.pred <-
  cbind(newtimetwo,
        fit = predict(Ycr.new_new_new_model, newtimetwo)^3,
        conf = predict(Ycr.new_new_new_model, newtimetwo, interval = "confidence")^3,
        pred = predict(Ycr.new_new_new_model, newtimetwo, interval = "prediction")^3)

# get rid of the extra fits
YcrNewNewNewTwo.pred$conf.fit <- YcrNewNewNewTwo.pred$pred.fit <- NULL

YcrNewNewNewFive.pred <-
  cbind(newtimefive,
        fit = predict(Ycr.new_new_new_model, newtimefive)^3,
        conf = predict(Ycr.new_new_new_model, newtimefive, interval = "confidence")^3,
        pred = predict(Ycr.new_new_new_model, newtimefive, interval = "prediction")^3)

# get rid of the extra fits
YcrNewNewNewFive.pred$conf.fit <- YcrNewNewNewFive.pred$pred.fit <- NULL


YlocKatterjåkkTwo.pred

YcrNewNewTwo.pred

YcrNewNewNewTwo.pred

YlocKatterjåkkFive.pred

YcrNewNewFive.pred

YcrNewNewNewFive.pred
######### 3 ########

lst <- sort(influence(Ycr.new_new_new_model)$hat, index.return=TRUE, decreasing=TRUE)
leverages <- lapply(lst, `[`, lst$x %in% head(unique(lst$x),7))

leverages$x
weather$location[leverages$ix]


#cooks.distance(Ycr.new_new_new_model)
max(cooks.distance(Ycr.new_new_new_model))
which.max(cooks.distance(Ycr.new_new_new_model))


ggplot(data = weather, aes(x = pressure, y = speed, col = factor(location))) + geom_point(size = 1) +
  geom_point(data = weather[leverages$ix,], aes(x = pressure, y = speed, col = ), size = 1,  colour = 'chocolate4') +
  geom_point(data = weather[1107,], aes(x = pressure, y = speed), size = 1, colour = 'black') +
  facet_wrap(~location, scales = "free_x") +
  #facet_grid(location~., scales = "free_x") +
  labs(title = "speed against Air Pressure (hPa)")

dfbetas(Ycr.new_new_new_model)[1107,]

ggplot(data = weather, aes(x = speed, y = rain^(1/3))) + geom_point(size = 1) +
  geom_point(data = weather[1107,], size = 1, colour = 'red') +
  #facet_wrap(~location) +
  labs(title = "Precipitation (mm) against Air speed")

#######

studentizedResiduals <- rstudent(Ycr.new_new_new_model)
fittedValues <- Ycr.new_new_new_model$fitted.values
studresvval <- data.frame(residuals = studentizedResiduals, fit = fittedValues)
ggplot(data = studresvval, aes(x = fit, y = residuals)) + geom_point(size = 1) +
  labs(title = "Studentized residuals against fitted values")

ggplot(data = studresvval, aes(x = fit, y = sqrt(abs(residuals)))) + geom_point(size = 1) +
  labs(title = "Root of absolute value of studentized residuals against fitted values")

#######
weather$monthnr <- as.factor(substr(weather$month, 6, 7))
null.model <- lm(rain^(1/3) ~ 1, data = weather)
full.model <- lm(rain^(1/3)~pressure*relevel(location, "Katterjåkk")*speed*temp*monthnr, data = weather)
BIC(null.model)
BIC(full.model)

step(Ycr.new_new_new_model, data = weather, scope = list(lower = null.model, upper = full.model), direction = "both", k =log(nrow(weather)))                

#step(lm(rain^(1/3)~pressure*location, data = weather), scope = list(lower = null.model, upper = lm(rain^(1/3)~pressure*location*speed*temp*monthnr, data = weather)), direction = "both")