Untitled
unknown
plain_text
4 years ago
5.7 kB
4
Indexable
# 'dataset' contiene los datos de entrada para este script library(gtrendsR) res1 <- gtrends("chile", geo = "CN", time = "2008-01-01 2021-08-01") CN<-res1$interest_over_time res2 <- gtrends("chile tourism", geo = "GB", time = "2008-01-01 2021-08-01") GB<-res2$interest_over_time res3 <- gtrends("chile", geo = "FR", time = "2008-01-01 2021-08-01") FR<-res3$interest_over_time res4 <- gtrends("chile", geo = "DE", time = "2008-01-01 2021-08-01") DE<-res4$interest_over_time res5 <- gtrends("chile turismo", geo = "ES", time = "2008-01-01 2021-08-01") ES<-res5$interest_over_time res6 <- gtrends("chile tourism", geo = "US", time = "2008-01-01 2021-08-01") US<-res6$interest_over_time res7 <- gtrends("chile turismo", geo = "CO", time = "2008-01-01 2021-08-01") CO<-res7$interest_over_time res8 <- gtrends("chile", geo = "BR", time = "2008-01-01 2021-08-01") BR<-res8$interest_over_time res9 <- gtrends("chile turismo", geo = "MX", time = "2008-01-01 2021-08-01") MX<-res9$interest_over_time res10 <- gtrends("chile turismo", geo = "AR", time = "2008-01-01 2021-08-01") AR<-res10$interest_over_time testit <- function(x) { p1 <- proc.time() Sys.sleep(x) proc.time() - p1 # The cpu usage should be negligible } testit(5) res11 <- gtrends("chile turismo", geo = "PE", time = "2008-01-01 2021-08-01") PE<-res11$interest_over_time res12 <- gtrends("chile turismo", geo = "BO", time = "2008-01-01 2021-08-01") BO<-res12$interest_over_time res13 <- gtrends("chile tourism", geo = "AU", time = "2008-01-01 2021-08-01") AU<-res13$interest_over_time res14 <- gtrends("chile", geo = "JP", time = "2008-01-01 2021-08-01") JP<-res14$interest_over_time res15 <- gtrends("chile tourism", geo = "CA", time = "2008-01-01 2021-08-01") CA<-res15$interest_over_time res16 <- gtrends("chile", geo = "IT", time = "2008-01-01 2021-08-01") IT<-res16$interest_over_time res17 <- gtrends("chile", geo = "CH", time = "2008-01-01 2021-08-01") CH<-res17$interest_over_time res18 <- gtrends("chile", geo = "KR", time = "2008-01-01 2021-08-01") KR<-res18$interest_over_time res19 <- gtrends("chile", geo = "IL", time = "2008-01-01 2021-08-01") IL<-res19$interest_over_time google_trends<-cbind(CN[,c(1,2)],GB[,2],FR[,2],DE[,2],ES[,2],US[,2],CO[,2],BR[,2],MX[,2], AR[,2],PE[,2],BO[,2],AU[,2],JP[,2],CA[,2],IT[,2],CH[,2],KR[,2],IL[,2]) library(plyr) # requires plyr for rbind.fill() cbind.fill <- function(...) { transpoted <- lapply(list(...),t) transpoted_dataframe <- lapply(transpoted, as.data.frame) return (data.frame(t(rbind.fill(transpoted_dataframe)))) } data_final<-cbind.fill(google_trends,dataset[,2]) colnames(data_final) <- c("Fecha","China", "UK", "Francia", "Alemania", "España","US","Colombia","Brazil","Mexico", "Argentina","Peru","Bolivia","Australia","Japon","Canada","Italia","Suiza","Coreadelsur","Israel","LLegadas_Turistas") normalize <- function(x) { return ((x - min(x)) / (max(x) - min(x))) } train<-data_final[1:90,-1] train[]<-lapply(train, function(x) as.numeric(as.character(x))) train[,1:19]<-normalize(train[,1:19]) test<-data_final[91:92,-1] test[]<-lapply(test, function(x) as.numeric(as.character(x))) test[,1:19]<-normalize(test[,1:19]) # random forest library(randomForest) table=NULL for (i in 1:10){ rf <- randomForest(LLegadas_Turistas ~., data=train, importance = TRUE,ntree=50) Importancia_variables <- as.data.frame(sort(rf$importance[,1],decreasing = TRUE),optional = T) Importancia_variables$paises <- rownames(Importancia_variables) colnames(Importancia_variables) <- c("Porcentaje","Paises") pred <- as.data.frame(round(predict(rf,test[,-20]))) colnames(pred)<-c("LLegadas_Turistas") prediccion<-cbind(data_final[91:92,],pred) prediccion<-prediccion[,-21] table<-cbind(table,c(prediccion=prediccion$LLegadas_Turistas)) } library(matrixStats) predicción<-cbind(data_final[91:92,],data.frame(rowMeans(table)),data.frame(rowSds(as.matrix(table), na.rm=TRUE))) predicción<-predicción[,-21] names(predicción)[21]<-"LLegadas_Turistas" names(predicción)[22]<-"Desviación estándar" data_final<-na.omit(data_final) #prediccion<-prediccion[,-21] list = list(Llegadas=data_final, Predicción=predicción) library(plyr) df3 <- ldply(list) df3$LLegadas_Turistas<-as.numeric(df3$LLegadas_Turistas)
Editor is loading...