Untitled
unknown
plain_text
2 years ago
6.0 kB
2
Indexable
#TRATAMENTO DE DADOS dados <- read.delim("Trabalho10_Dados.txt", header= FALSE, sep="") #escolha dos dados pedidos no enunciado dados <- dados[,2:13] names(dados)<- c("Age", "Height", "Sex", "Survival", "Shock.Type", "Systolic.Pressure", "Mean.Arterial.Pressure", "Heart.Rate", "Diastolic.Pressure", "Mean.Central.Venous.Pressure", "Body.Surface.Area", "Cardiac.Index") rows <- nrow(dados) even_rows <- seq_len(rows) %% 2 dados_inicial <- dados[even_rows == 1,] rownames(dados_inicial) <- NULL #renumerar linhas nrows <- nrow(dados_inicial) #transformar qualitativas qualitativas <- c("Sex", "Survival", "Shock.Type") for (i in 1:ncol(dados_inicial)){ if (names(dados_inicial[i]) %in% qualitativas){ dados_inicial[,i] <- factor(dados_inicial[,i]) } } #ANALISE DE DADOS summary(dados_inicial) #box.plot continuas dados_cont <- dados_inicial[ ,-c(match(qualitativas, names(dados_inicial)))] par(mfrow = c(3, ncol(dados_cont)/3), mar=c(2,2,2,2), cex=0.5) lapply(1:ncol(dados_cont), function(i) boxplot(dados_cont[,i], main=names(dados_cont)[i])) #variavel resposta y <- matrix(dados_inicial$Cardiac.Index) dados_inicial <- dados_inicial[,-ncol(dados_inicial)] #correlações library(corrplot) a <- cor(data.matrix(dados_cont)) dev.off() corrplot(a, method = 'color', addCoef.col = 'black', number.cex = 0.45, tl.cex = 0.5, tl.col="black") #eliminar Mean Arterial mrl.comp <-lm(y ~ .,data=dados_inicial) library(car) library(usdm) vif(mrl.comp) dados_inicial <- dados_inicial[, -which(colnames(dados_inicial) %in% c("Mean.Arterial.Pressure"))] mrl.comp <-lm(y ~ .,data=dados_inicial) vif(mrl.comp) #ANALISE PRELIMINAR DO MODELO COMPLETO #modelo completo com covariaveis eliminadas #plot(mrl.comp) summary(mrl.comp) extractAIC(mrl.comp) library(gvlma) gvlma(mrl.comp) #boxcox para por y normal library(MASS) b <- boxcox(lm(y ~ 1)) lambda <- b$x[which.max(b$y)] #lambda=0.30303... y2 <- (y^lambda-1)/lambda #teste de ajustamento do y shapiro.test(y) #p-value = 1.101e-05 shapiro.test(y2) #p-value = 0.7035 > 0.25 #modelo com y normal mrl.comp2 <-lm(y2 ~ .,data=dados_inicial) #plot(mrl.comp2) #mais proximo de normal #eliminar outliers - nao tem pelo box plot boxplot(y2) ##dados de treino e de teste set.seed(73) teste_ind<-sort(sample(nrows,0.2*nrows)) #Treino dados_treino <-dados_inicial[-teste_ind,] y_treino <- y2[-teste_ind] summary(dados_treino) shapiro.test(y_treino) #Teste dados_teste <- dados_inicial[teste_ind,] y_teste <- y2[teste_ind] shapiro.test(y_teste) #matrizes x X <- cbind(1,data.matrix(dados_treino)) n=length(y_treino) mrl.full <- lm(y_treino~.,data=dados_treino) summary(mrl.full) extractAIC(mrl.full) #stepforward #primeira int mrl.base <-lm(y_treino~1, data=dados_treino) mrl.stepforward <- step(mrl.base, scope = list(upper = formula(mrl.full), lower = formula(mrl.base)), direction = "forward", trace="FALSE") summary(mrl.stepforward) extractAIC(mrl.stepforward) formula(mrl.stepforward) #segunda int mrl.int2 <- lm(y_treino ~ (Shock.Type + Sex + Diastolic.Pressure + Systolic.Pressure + Age + Body.Surface.Area)^2, data = dados_treino) mrl.stepforward2 <- step(mrl.stepforward, scope = list(upper = formula(mrl.int2), lower = formula(mrl.stepforward)), direction = "forward", trace="FALSE") summary(mrl.stepforward2) extractAIC(mrl.stepforward2) formula(mrl.stepforward2) #terceira int mrl.int3 <- update(mrl.stepforward2, .~. + Sex:Body.Surface.Area:Diastolic.Pressure) mrl.stepforward3 <- step(mrl.stepforward2, scope = list(upper = formula(mrl.int3), lower = formula(mrl.stepforward2)), direction = "forward", trace="FALSE") summary(mrl.stepforward3) extractAIC(mrl.stepforward3) formula(mrl.stepforward3) library(ggplot2) predictions2 <- predict(mrl.stepforward, newdata = dados_teste) d2<-data.frame(predictions2, y=y_teste) ggplot(d2, aes(predictions2, y)) + geom_point(shape = 16, size = 3, show.legend = FALSE) ri <- y_teste - predictions2 sum(ri^2)/length(y_teste) #bestsubset com r^2 ajustado library(leaps) bestsubsets <- regsubsets(y_treino~., data = dados_treino, nvmax = ncol(dados_treino)) plot(bestsubsets, scale = "adjr2") bs_summary <- summary(bestsubsets) ind_r2 <- which.max(bs_summary$adjr2) best.r<- bestsubsets[ind_r2] coef.r <- best.r$thetab coef.r <- coef(bestsubsets, ind_r2) mrl.best.r<- lm(y_treino ~ ., data = dados_treino) summary(mrl.best.r) extractAIC(mrl.best.r) formula(mrl.best.r) plot(bs_summary$adjr2, xlab = "Number of Variables", ylab = "Adjusted RSq", type = "b") best_adj_r2 = which.max(bs_summary$adjr2) points(best_adj_r2, bs_summary$adjr2[best_adj_r2], col = "red", cex = 2, pch = 20) #bestsubset com aic bestsubsets2 <- best.subset(y_treino~., data = dados_treino, method="aic") ################# #stepbackward mrl.stepbackward <- step(mrl.full, direction = "backward", trace=FALSE) summary(mrl.stepbackward) extractAIC(mrl.stepbackward) formula(mrl.stepbackward) library(ggplot2) predictions <- predict(mrl.full, newdata = dados_treino) d<-data.frame(predictions, y=y_treino) ggplot(d, aes(predictions, y)) + geom_point(shape = 16, size = 3, show.legend = FALSE) library(emmeans) emmip(mrl.stepbackward,Sex ~ Age, cov.reduce=range) #não há interação emmip(mrl.stepbackward,Shock.Type ~ Age, cov.reduce=range) #não há interação emmip(mrl.full,Sex ~ Body.Surface.Area, cov.reduce=range) #não há interação emmip(mrl.full,Age ~ Height, cov.reduce=range) #não há interação
Editor is loading...