Untitled
unknown
plain_text
2 years ago
6.1 kB
6
Indexable
#TRATAMENTO DE DADOS dados <- read.delim("Trabalho10_Dados.txt", header= FALSE, sep="") #escolha dos dados pedidos no enunciado dados <- dados[,2:13] names(dados)<- c("Age", "Height", "Sex", "Survival", "Shock.Type", "Systolic.Pressure", "Mean.Arterial.Pressure", "Heart.Rate", "Diastolic.Pressure", "Mean.Central.Venous.Pressure", "Body.Surface.Area", "Cardiac.Index") rows <- nrow(dados) even_rows <- seq_len(rows) %% 2 dados_inicial <- dados[even_rows == 1,] rownames(dados_inicial) <- NULL #renumerar linhas nrows <- nrow(dados_inicial) #transformar qualitativas qualitativas <- c("Sex", "Survival", "Shock.Type") for (i in 1:ncol(dados_inicial)){ if (names(dados_inicial[i]) %in% qualitativas){ dados_inicial[,i] <- factor(dados_inicial[,i]) } } #ANALISE DE DADOS summary(dados_inicial) #box.plot continuas dados_cont <- dados_inicial[ ,-c(match(qualitativas, names(dados_inicial)))] par(mfrow = c(3, ncol(dados_cont)/3), mar=c(2,2,2,2), cex=0.5) lapply(1:ncol(dados_cont), function(i) boxplot(dados_cont[,i], main=names(dados_cont)[i])) #variavel resposta y <- matrix(dados_inicial$Cardiac.Index) dados_inicial <- dados_inicial[,-ncol(dados_inicial)] #correlações library(corrplot) a <- cor(data.matrix(dados_cont)) dev.off() corrplot(a, method = 'color', addCoef.col = 'black', number.cex = 0.45, tl.cex = 0.5, tl.col="black") #eliminar Mean Arterial mrl.comp <-lm(y ~ .,data=dados_inicial) library(car) library(usdm) vif(mrl.comp) dados_inicial <- dados_inicial[, -which(colnames(dados_inicial) %in% c("Mean.Arterial.Pressure"))] mrl.comp <-lm(y ~ .,data=dados_inicial) vif(mrl.comp) #ANALISE PRELIMINAR DO MODELO COMPLETO #modelo completo com covariaveis eliminadas #plot(mrl.comp) summary(mrl.comp) extractAIC(mrl.comp) library(gvlma) gvlma(mrl.comp) #boxcox para por y normal library(MASS) b <- boxcox(lm(y ~ 1)) lambda <- b$x[which.max(b$y)] #lambda=0.30303... y2 <- (y^lambda-1)/lambda #teste de ajustamento do y shapiro.test(y) #p-value = 1.101e-05 shapiro.test(y2) #p-value = 0.7035 > 0.25 #modelo com y normal mrl.comp2 <-lm(y2 ~ .,data=dados_inicial) #plot(mrl.comp2) #mais proximo de normal #eliminar outliers - nao tem pelo box plot boxplot(y2) ##dados de treino e de teste set.seed(73) teste_ind<-sort(sample(nrows,0.2*nrows)) #Treino dados_treino <-dados_inicial[-teste_ind,] y_treino <- y2[-teste_ind] summary(dados_treino) shapiro.test(y_treino) #Teste dados_teste <- dados_inicial[teste_ind,] y_teste <- y2[teste_ind] shapiro.test(y_teste) #matrizes x X <- cbind(1,data.matrix(dados_treino)) n=length(y_treino) mrl.full <- lm(y_treino~.,data=dados_treino) summary(mrl.full) extractAIC(mrl.full) #stepforward #primeira int mrl.base <-lm(y_treino~1, data=dados_treino) mrl.stepforward <- step(mrl.base, scope = list(upper = formula(mrl.full), lower = formula(mrl.base)), direction = "forward", trace="FALSE") summary(mrl.stepforward) extractAIC(mrl.stepforward) formula(mrl.stepforward) #segunda int mrl.int2 <- lm(y_treino ~ (Shock.Type + Sex + Diastolic.Pressure + Systolic.Pressure + Age + Body.Surface.Area)^2, data = dados_treino) mrl.stepforward2 <- step(mrl.stepforward, scope = list(upper = formula(mrl.int2), lower = formula(mrl.stepforward)), direction = "forward", trace="FALSE") summary(mrl.stepforward2) extractAIC(mrl.stepforward2) formula(mrl.stepforward2) #terceira int mrl.int3 <- update(mrl.stepforward2, .~. + Sex:Body.Surface.Area:Diastolic.Pressure) mrl.stepforward3 <- step(mrl.stepforward2, scope = list(upper = formula(mrl.int3), lower = formula(mrl.stepforward2)), direction = "forward", trace="FALSE") summary(mrl.stepforward3) extractAIC(mrl.stepforward3) formula(mrl.stepforward3) library(ggplot2) predictions2 <- predict(mrl.stepforward, newdata = dados_teste) d2<-data.frame(predictions2, y=y_teste) ggplot(d2, aes(predictions2, y)) + geom_point(shape = 16, size = 3, show.legend = FALSE) ri <- y_teste - predictions2 sum(ri^2)/length(y_teste) #bestsubset com r^2 ajustado library(leaps) bestsubsets <- regsubsets(y_treino~., data = dados_treino, nvmax = ncol(dados_treino)) plot(bestsubsets, scale = "adjr2") bs_summary <- summary(bestsubsets) ind_r2 <- which.max(bs_summary$adjr2) coef.r <- coef(bestsubsets, ind_r2) #Age, Sex, Shock.Type, Systolic.Pressure, Diastolic.Pressure, Body.Surface.Area mrl.best.r<- lm(y_treino ~ Age + Sex + Shock.Type + Systolic.Pressure + Diastolic.Pressure + Body.Surface.Area, data = dados_treino) summary(mrl.best.r) extractAIC(mrl.best.r) formula(mrl.best.r) #com interações 2 a 2 bestsubsets2 <- regsubsets(y_treino ~ (Age + Sex + Shock.Type + Systolic.Pressure + Diastolic.Pressure + Body.Surface.Area)^2, data = dados_treino, nvmax = 21, force.in = c(1,3,5,6,8,10)) plot(bestsubsets2, scale = "adjr2") bs_summary2 <- summary(bestsubsets2) ind_r22 <- which.max(bs_summary2$adjr2) coef.r2 <- coef(bestsubsets2, ind_r22) #Age, Sex, Shock.Type, Systolic.Pressure, Diastolic.Pressure, Body.Surface.Area, #Age:Shock.Type, Sex:Systolic.Pressure, Sex:Body.Surface.Area, Shock.Type:Systolic.Pressure, #Shock.Type:Diastolic.Pressure, Diastolic.Pressure:Body.Surface.Area mrl.best.r2<- update(mrl.best.r, .~. + Age:Shock.Type + Sex:Systolic.Pressure + Sex:Body.Surface.Area + Shock.Type:Systolic.Pressure + Shock.Type:Diastolic.Pressure + Diastolic.Pressure:Body.Surface.Area) summary(mrl.best.r2) extractAIC(mrl.best.r2) formula(mrl.best.r2) #não há interações 3 a 3
Editor is loading...