在编程篇中看到排序的时候,代码如下
- package cn.base.mapreduce;
-
- import java.io.IOException;
- import java.util.Iterator;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
- import org.apache.hadoop.util.GenericOptionsParser;
-
- public class Sort {
-
- public static class Map extends Mapper<Object, Text, IntWritable, IntWritable>{
-
- private static IntWritable iw = new IntWritable();
-
-
- protected void map(Object key, Text value,Context context)
- throws IOException, InterruptedException {
- // TODO Auto-generated method stub
- String line = value.toString();
- iw.set(Integer.parseInt(line));
- context.write(iw, new IntWritable(1));
- }
-
- }
-
- public static class Reduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>{
-
- private static IntWritable iw = new IntWritable(1);
-
-
- protected void reduce(IntWritable key, Iterable<IntWritable> values,Context context)
- throws IOException, InterruptedException {
- // TODO Auto-generated method stub
-
- for(IntWritable val : values){
- context.write(iw, key);
- iw = new IntWritable(iw.get() + 1);
- }
- }
-
- }
-
- public static void main(String[] args) throws Exception {
-
- Configuration conf = new Configuration();
-
- String[] ioArgs = {"dedup_in","output"};
-
- String[] otherArgs = new GenericOptionsParser(ioArgs).getRemainingArgs();
-
- if(otherArgs.length != 2){
- System.err.println("Usage: Data Deduplication <in> <out>");
-
- System.exit(2);
- }
-
- Job job = new Job(conf,"Sort");
-
- job.setJarByClass(Sort.class);
-
- job.setMapperClass(Map.class);
- //job.setCombinerClass(Reduce.class);
- job.setReducerClass(Reduce.class);
-
-
- job.setInputFormatClass(TextInputFormat.class);
- job.setOutputKeyClass(IntWritable.class);
- job.setOutputValueClass(IntWritable.class);
- job.setOutputFormatClass(TextOutputFormat.class);
-
- FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
- FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
-
- System.exit(job.waitForCompletion(true) ? 0 : 1);
- }
-
- }
复制代码
假设我加入了//job.setCombinerClass(Reduce.class);那么就达不到想要的效果,而且数据都错了
请问这个在哪些时候加入,也看了这个job.setCombinerClass(Reduce.class);介绍,但并不是非常明白
谁可以帮助我解释下吗?
|