Untitled
unknown
plain_text
3 years ago
1.5 kB
11
Indexable
Sys.setenv("HADOOP_CMD"="/usr/local/hadoop/bin/hadoop")
Sys.setenv("HADOOP_STREAMING"="/usr/local/hadoop/share/hadoop/tools/lib/hadoop-streaming-3.3.4.jar")
Sys.setenv("HADOOP_HOME"="/usr/local/hadoop")
Sys.setenv("JAVA_HOME"="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.362.b08-1.el7_9.x86_64")
library(rmr2)
library(rhdfs)
library(lubridate)
library(stringr)
library(rJava)
hdfs.init()
DocMese = NULL
Qnt = NULL
map = function(.,lines) {
for(i in 1:length(lines)){
fields = unlist(strsplit(lines[i], ","))
AnnoMese = as.character(str_sub(ymd(fields[3]),1,7))
Quantita = as.numeric(fields[4])
print(AnnoMese)
for(k in 1:length(fields)){
if(fields[k] == "FATTURA"){
RigaAnnoMese = paste0(AnnoMese," ", fields[k])
DocMese = rbind(DocMese, RigaAnnoMese)
Qnt = rbind(Qnt, Quantita)
}
}
}
return( keyval(DocMese, Qnt) )
}
reduce = function(key,val){
keyval(key, mean(round(val, digits = 2)))
}
job1=function(input, output=NULL){
mapreduce(input=input, output=output, input.format="text", map=map, reduce=reduce,combine = T)
}
system("/usr/local/hadoop/bin/hdfs dfs -rm -r /appoLuca/out")
hdfs.root <- '/appoLuca'
hdfs.data <- file.path(hdfs.root, 'ordini.csv')
hdfs.out <- file.path(hdfs.root, 'out')
out <- job1(hdfs.data, hdfs.out)
results <- from.dfs(out)
results.df <- as.data.frame(results, stringsAsFactors=F)
print("printing output...\n")
colnames(results.df)
print(results.df)
Editor is loading...