Untitled

 avatar
unknown
plain_text
4 years ago
5.7 kB
4
Indexable
# 'dataset' contiene los datos de entrada para este script
library(gtrendsR)

res1 <- gtrends("chile",
                geo = "CN",
                time = "2008-01-01 2021-08-01")

CN<-res1$interest_over_time

res2 <- gtrends("chile tourism",
                geo = "GB",
                time = "2008-01-01 2021-08-01")

GB<-res2$interest_over_time

res3 <- gtrends("chile",
                geo = "FR",
                time = "2008-01-01 2021-08-01")

FR<-res3$interest_over_time


res4 <- gtrends("chile",
                geo = "DE",
                time = "2008-01-01 2021-08-01")

DE<-res4$interest_over_time

res5 <- gtrends("chile turismo",
                geo = "ES",
                time = "2008-01-01 2021-08-01")

ES<-res5$interest_over_time


res6 <- gtrends("chile tourism",
                geo = "US",
                time = "2008-01-01 2021-08-01")

US<-res6$interest_over_time

res7 <- gtrends("chile turismo",
                geo = "CO",
                time = "2008-01-01 2021-08-01")

CO<-res7$interest_over_time


res8 <- gtrends("chile",
                geo = "BR",
                time = "2008-01-01 2021-08-01")

BR<-res8$interest_over_time

res9 <- gtrends("chile turismo",
                geo = "MX",
                time = "2008-01-01 2021-08-01")

MX<-res9$interest_over_time


res10 <- gtrends("chile turismo",
                 geo = "AR",
                 time = "2008-01-01 2021-08-01")

AR<-res10$interest_over_time


testit <- function(x)
{
  p1 <- proc.time()
  Sys.sleep(x)
  proc.time() - p1 # The cpu usage should be negligible
}
testit(5)

res11 <- gtrends("chile turismo",
                 geo = "PE",
                 time = "2008-01-01 2021-08-01")

PE<-res11$interest_over_time

res12 <- gtrends("chile turismo",
                 geo = "BO",
                 time = "2008-01-01 2021-08-01")

BO<-res12$interest_over_time

res13 <- gtrends("chile tourism",
                 geo = "AU",
                 time = "2008-01-01 2021-08-01")

AU<-res13$interest_over_time

res14 <- gtrends("chile",
                 geo = "JP",
                 time = "2008-01-01 2021-08-01")

JP<-res14$interest_over_time


res15 <- gtrends("chile tourism",
                 geo = "CA",
                 time = "2008-01-01 2021-08-01")

CA<-res15$interest_over_time


res16 <- gtrends("chile",
                 geo = "IT",
                 time = "2008-01-01 2021-08-01")

IT<-res16$interest_over_time


res17 <- gtrends("chile",
                 geo = "CH",
                 time = "2008-01-01 2021-08-01")

CH<-res17$interest_over_time


res18 <- gtrends("chile",
                 geo = "KR",
                 time = "2008-01-01 2021-08-01")

KR<-res18$interest_over_time

res19 <- gtrends("chile",
                 geo = "IL",
                 time = "2008-01-01 2021-08-01")

IL<-res19$interest_over_time


google_trends<-cbind(CN[,c(1,2)],GB[,2],FR[,2],DE[,2],ES[,2],US[,2],CO[,2],BR[,2],MX[,2],
                     AR[,2],PE[,2],BO[,2],AU[,2],JP[,2],CA[,2],IT[,2],CH[,2],KR[,2],IL[,2])

library(plyr) # requires plyr for rbind.fill()
cbind.fill <- function(...) {                                                                                                                                                       
  transpoted <- lapply(list(...),t)                                                                                                                                                 
  transpoted_dataframe <- lapply(transpoted, as.data.frame)                                                                                                                         
  return (data.frame(t(rbind.fill(transpoted_dataframe))))                                                                                                                          
} 

data_final<-cbind.fill(google_trends,dataset[,2])


colnames(data_final) <- c("Fecha","China", "UK", "Francia", "Alemania", "España","US","Colombia","Brazil","Mexico",
                          "Argentina","Peru","Bolivia","Australia","Japon","Canada","Italia","Suiza","Coreadelsur","Israel","LLegadas_Turistas")


normalize <- function(x) {
  return ((x - min(x)) / (max(x) - min(x)))
}



train<-data_final[1:90,-1]
train[]<-lapply(train, function(x) as.numeric(as.character(x)))
train[,1:19]<-normalize(train[,1:19])
test<-data_final[91:92,-1]
test[]<-lapply(test, function(x) as.numeric(as.character(x)))
test[,1:19]<-normalize(test[,1:19])

# random forest
library(randomForest)
table=NULL
for (i in 1:10){
  rf <- randomForest(LLegadas_Turistas ~., data=train, importance = TRUE,ntree=50)
  
  Importancia_variables <- as.data.frame(sort(rf$importance[,1],decreasing = TRUE),optional = T)
  Importancia_variables$paises <- rownames(Importancia_variables)
  colnames(Importancia_variables) <- c("Porcentaje","Paises")
  pred <- as.data.frame(round(predict(rf,test[,-20])))
  colnames(pred)<-c("LLegadas_Turistas")
  prediccion<-cbind(data_final[91:92,],pred)
  prediccion<-prediccion[,-21]
  table<-cbind(table,c(prediccion=prediccion$LLegadas_Turistas))
}

library(matrixStats)

predicción<-cbind(data_final[91:92,],data.frame(rowMeans(table)),data.frame(rowSds(as.matrix(table), na.rm=TRUE)))
predicción<-predicción[,-21]
names(predicción)[21]<-"LLegadas_Turistas"
names(predicción)[22]<-"Desviación estándar"




data_final<-na.omit(data_final)
#prediccion<-prediccion[,-21]
list = list(Llegadas=data_final, Predicción=predicción)
library(plyr)
df3 <- ldply(list)
df3$LLegadas_Turistas<-as.numeric(df3$LLegadas_Turistas)
Editor is loading...