Untitled
unknown
plain_text
4 years ago
5.7 kB
8
Indexable
# 'dataset' contiene los datos de entrada para este script
library(gtrendsR)
res1 <- gtrends("chile",
geo = "CN",
time = "2008-01-01 2021-08-01")
CN<-res1$interest_over_time
res2 <- gtrends("chile tourism",
geo = "GB",
time = "2008-01-01 2021-08-01")
GB<-res2$interest_over_time
res3 <- gtrends("chile",
geo = "FR",
time = "2008-01-01 2021-08-01")
FR<-res3$interest_over_time
res4 <- gtrends("chile",
geo = "DE",
time = "2008-01-01 2021-08-01")
DE<-res4$interest_over_time
res5 <- gtrends("chile turismo",
geo = "ES",
time = "2008-01-01 2021-08-01")
ES<-res5$interest_over_time
res6 <- gtrends("chile tourism",
geo = "US",
time = "2008-01-01 2021-08-01")
US<-res6$interest_over_time
res7 <- gtrends("chile turismo",
geo = "CO",
time = "2008-01-01 2021-08-01")
CO<-res7$interest_over_time
res8 <- gtrends("chile",
geo = "BR",
time = "2008-01-01 2021-08-01")
BR<-res8$interest_over_time
res9 <- gtrends("chile turismo",
geo = "MX",
time = "2008-01-01 2021-08-01")
MX<-res9$interest_over_time
res10 <- gtrends("chile turismo",
geo = "AR",
time = "2008-01-01 2021-08-01")
AR<-res10$interest_over_time
testit <- function(x)
{
p1 <- proc.time()
Sys.sleep(x)
proc.time() - p1 # The cpu usage should be negligible
}
testit(5)
res11 <- gtrends("chile turismo",
geo = "PE",
time = "2008-01-01 2021-08-01")
PE<-res11$interest_over_time
res12 <- gtrends("chile turismo",
geo = "BO",
time = "2008-01-01 2021-08-01")
BO<-res12$interest_over_time
res13 <- gtrends("chile tourism",
geo = "AU",
time = "2008-01-01 2021-08-01")
AU<-res13$interest_over_time
res14 <- gtrends("chile",
geo = "JP",
time = "2008-01-01 2021-08-01")
JP<-res14$interest_over_time
res15 <- gtrends("chile tourism",
geo = "CA",
time = "2008-01-01 2021-08-01")
CA<-res15$interest_over_time
res16 <- gtrends("chile",
geo = "IT",
time = "2008-01-01 2021-08-01")
IT<-res16$interest_over_time
res17 <- gtrends("chile",
geo = "CH",
time = "2008-01-01 2021-08-01")
CH<-res17$interest_over_time
res18 <- gtrends("chile",
geo = "KR",
time = "2008-01-01 2021-08-01")
KR<-res18$interest_over_time
res19 <- gtrends("chile",
geo = "IL",
time = "2008-01-01 2021-08-01")
IL<-res19$interest_over_time
google_trends<-cbind(CN[,c(1,2)],GB[,2],FR[,2],DE[,2],ES[,2],US[,2],CO[,2],BR[,2],MX[,2],
AR[,2],PE[,2],BO[,2],AU[,2],JP[,2],CA[,2],IT[,2],CH[,2],KR[,2],IL[,2])
library(plyr) # requires plyr for rbind.fill()
cbind.fill <- function(...) {
transpoted <- lapply(list(...),t)
transpoted_dataframe <- lapply(transpoted, as.data.frame)
return (data.frame(t(rbind.fill(transpoted_dataframe))))
}
data_final<-cbind.fill(google_trends,dataset[,2])
colnames(data_final) <- c("Fecha","China", "UK", "Francia", "Alemania", "España","US","Colombia","Brazil","Mexico",
"Argentina","Peru","Bolivia","Australia","Japon","Canada","Italia","Suiza","Coreadelsur","Israel","LLegadas_Turistas")
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
train<-data_final[1:90,-1]
train[]<-lapply(train, function(x) as.numeric(as.character(x)))
train[,1:19]<-normalize(train[,1:19])
test<-data_final[91:92,-1]
test[]<-lapply(test, function(x) as.numeric(as.character(x)))
test[,1:19]<-normalize(test[,1:19])
# random forest
library(randomForest)
table=NULL
for (i in 1:10){
rf <- randomForest(LLegadas_Turistas ~., data=train, importance = TRUE,ntree=50)
Importancia_variables <- as.data.frame(sort(rf$importance[,1],decreasing = TRUE),optional = T)
Importancia_variables$paises <- rownames(Importancia_variables)
colnames(Importancia_variables) <- c("Porcentaje","Paises")
pred <- as.data.frame(round(predict(rf,test[,-20])))
colnames(pred)<-c("LLegadas_Turistas")
prediccion<-cbind(data_final[91:92,],pred)
prediccion<-prediccion[,-21]
table<-cbind(table,c(prediccion=prediccion$LLegadas_Turistas))
}
library(matrixStats)
predicción<-cbind(data_final[91:92,],data.frame(rowMeans(table)),data.frame(rowSds(as.matrix(table), na.rm=TRUE)))
predicción<-predicción[,-21]
names(predicción)[21]<-"LLegadas_Turistas"
names(predicción)[22]<-"Desviación estándar"
data_final<-na.omit(data_final)
#prediccion<-prediccion[,-21]
list = list(Llegadas=data_final, Predicción=predicción)
library(plyr)
df3 <- ldply(list)
df3$LLegadas_Turistas<-as.numeric(df3$LLegadas_Turistas)Editor is loading...