Untitled

 avatar
unknown
plain_text
2 years ago
2.1 kB
5
Indexable
Sys.setenv("HADOOP_CMD"="/usr/local/hadoop/bin/hadoop")
Sys.setenv("HADOOP_STREAMING"="/usr/local/hadoop/share/hadoop/tools/lib/hadoop-streaming-3.3.4.jar")
Sys.setenv("HADOOP_HOME"="/usr/local/hadoop")
Sys.setenv("JAVA_HOME"="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.362.b08-1.el7_9.x86_64")

library(rmr2)
library(rhdfs)
library(lubridate)
library(stringr)
library(rJava)

# Imposta il percorso del file CSV
file_path <- "/home/hadoop/Luca/ordini.csv"


# Legge il contenuto del file
# file_content <- readLines(file_path)

# Itera sulle righe del file
map <- function(.,file) {
  file_content <- readLines(file)
  
  # print(length(seq_along(file_content)))
  for (i in seq_along(file_content)) {
    if(grepl("FATTURA", file_content[i])){
      splitted_r = unlist(strsplit(file_content[i], ","))
      #print(as.character(str_sub(ymd(splitted_r[2]),1,7)))
      year_month = as.character(str_sub(ymd(splitted_r[2]),1,7))
      price = splitted_r[3]
      year_month_str = paste0(year_month," " ,splitted_r[1])
      year_month_doc    = rbind(year_month_doc, year_month_str)
      prices  = rbind(prices, price)
    }

  }
  return(keyval(year_month_doc, prices))
}

reduce <- function(key, prices) {
  print(keyval(year_month_doc, mean(prices)))
}

job1 <- function (input, output=NULL) {
  mapreduce(input=input, output=output, input.format="text", map=map, reduce=reduce,combine = T)
}

year_month_doc=NULL
prices=NULL
system("/usr/local/hadoop/bin/hdfs dfs -rm -r /appoLuca/out") 
hdfs.init
hdfs.root='/appoLuca'
hdfs.data <- file.path(hdfs.root, 'ordini.csv')
hdfs.out <- file.path(hdfs.root, 'out')
out <- job1(hdfs.data, hdfs.out) 



# DocMese = NULL
# Qnt = NULL
# 
# head(str(lines))
# for(i in 1:length(lines)){
#   print(ines[i])
#   fields = strsplit(lines[i], ',')
#   AnnoMese = as.character(str_sub(ymd(fields[3]),1,7))
#   print(AnnoMese)
#   Quantita = as.numeric(fields[4])
#   for(k in 1:length(fields)){
#     if(fields[k] == "FATTURA"){
#       RigaAnnoMese = paste0(AnnoMese," ", fields[k])
#       DocMese = rbind(DocMese, RigaAnnoMese)
#       Qnt = rbind(Qnt, Quantita)
#     }
#   }
# }
# 
# close(conn)
Editor is loading...