MapReduce中的setCombinerClass疑问

在编程篇中看到排序的时候，代码如下

package cn.base.mapreduce;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Sort {
        
        public static class Map extends Mapper<Object, Text, IntWritable, IntWritable>{
                
                private static IntWritable iw = new IntWritable();
                
                
                protected void map(Object key, Text value,Context context)
                                throws IOException, InterruptedException {
                        // TODO Auto-generated method stub
                        String line = value.toString();
                        iw.set(Integer.parseInt(line));
                        context.write(iw, new IntWritable(1));
                }
                
        }
        
        public static class Reduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>{
                
                private static IntWritable iw = new IntWritable(1);
                
                
                protected void reduce(IntWritable key, Iterable<IntWritable> values,Context context)
                                throws IOException, InterruptedException {
                        // TODO Auto-generated method stub
                        
                        for(IntWritable val : values){
                                context.write(iw, key);
                                iw = new IntWritable(iw.get() + 1);
                        }
                }
                
        }
        
        public static void main(String[] args) throws Exception {
                
                Configuration conf = new Configuration();
                
                String[] ioArgs = {"dedup_in","output"};
                
                String[] otherArgs = new GenericOptionsParser(ioArgs).getRemainingArgs();
                
                if(otherArgs.length != 2){
                        System.err.println("Usage: Data Deduplication <in> <out>");

                     System.exit(2);
                }
                
                Job job = new Job(conf,"Sort");
                
                job.setJarByClass(Sort.class);
                
                job.setMapperClass(Map.class);
                //job.setCombinerClass(Reduce.class);
                job.setReducerClass(Reduce.class);
                
                
                job.setInputFormatClass(TextInputFormat.class);
                job.setOutputKeyClass(IntWritable.class);
                job.setOutputValueClass(IntWritable.class);
                job.setOutputFormatClass(TextOutputFormat.class);
                
                FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
                FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
                
                System.exit(job.waitForCompletion(true) ? 0 : 1);
        }

}
复制代码

假设我加入了//job.setCombinerClass(Reduce.class);那么就达不到想要的效果，而且数据都错了
请问这个在哪些时候加入，也看了这个job.setCombinerClass(Reduce.class);介绍，但并不是非常明白
谁可以帮助我解释下吗?

howtodown · 发表于 2014-10-20 12:44:12

本帖最后由 howtodown 于 2014-10-20 13:26 编辑
Reduce在map阶段执行称之为combine，而且map的个数一般比reduce个数多的多。

之所以混乱，是因为你在reduce里面不止做了排序，还执行了其它操作，如果只是单独的排序，map的combine能提高效率

Joker · 发表于 2014-10-20 13:25:34

howtodown 发表于 2014-10-20 12:44
Reduce在map阶段执行称之为combine，而且map的个数一般比reduce个数多的多。

之所以混乱，是因为你在red ...

多谢版主，写了job.setCombinerClass(Reduce.class);可以提高我的效率

howtodown · 发表于 2014-10-20 13:27:22

Joker 发表于 2014-10-20 13:25
多谢版主，写了job.setCombinerClass(Reduce.class);可以提高我的效率

恩，是的，提高整个程序的运行效率

evababy · 发表于 2014-11-28 10:46:54

map的value作为输出，又吧reduce的value作为输出，如果中间执行了combiner，造成reduce输入KEY有已经是被合并数据，相当于执行了两次reduce，你说结果能一样么？

图文精华

MapReduce中的setCombinerClass疑问

已有(4)人评论

最佳新人

热心会员

活跃会员

推广达人

宣传达人

突出贡献

优秀版主

论坛元老

推荐 /2