Untitled
unknown
plain_text
5 months ago
2.0 kB
3
Indexable
import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WCDriver { public static void main(String[] args) throws Exception { Job job = new Job(); job.setJarByClass(WCDriver.class); job.setJobName("Word Count"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(WCMapper.class); job.setReducerClass(WCReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); } } WCMapper.java import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{ String line = value.toString(); for(String word: line.split("\\W+")) { if(word.length() > 0) { context.write(new Text(word), new IntWritable(1)); } } } } WCReducer.java import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ @Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{ int wordCount = 0; for(IntWritable value :values){ wordCount += value.get(); } context.write(key, new IntWritable(wordCount)); } }
Editor is loading...
Leave a Comment