前天在这里下载了《Hadoop视频[共44集适合入门 MP4格式]》的视频回去看了,然后按照第15集的代码,写代码在自己搭建的hadoop下测试,一直报错。具体的内容如下:
我的hadoop环境是hadoop1.0.3
mapreduce的代码如下:
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.lib.input.*;
//import org.apache.hadoop.mapred.TextInputFormat;
//import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool{
enum Counter
{
LINESKIP,//出错的行
}
public static class MapClass
extends Mapper<LongWritable, Text, Text, Text>{
public void Map(LongWritable key, Text value,Context context) throws IOException,InterruptedException
{
String line = value.toString();
try
{
String [] lineSplit = line.split(" ");
String anum = lineSplit[0];
String bnum = lineSplit[1];
context.write(new Text(bnum), new Text(anum));
}
catch(java.lang.ArrayIndexOutOfBoundsException e)
{
context.getCounter(Counter.LINESKIP).increment(1);
return;
}
}
}
public static class ReduceClass
extends Reducer<Text,Text,Text,Text> {
@Override
public void reduce(Text key, Iterable<Text> values,Context context)
throws IOException,InterruptedException
{
String valueString;
String out = "";
for(Text value : values)
{
valueString = value.toString();
out+=valueString+"|";
}
context.write(key, new Text(out));
}
}
public int run(String[] args)throws Exception,IOException, ClassNotFoundException, InterruptedException
{
Configuration conf = getConf();
Job job = new Job(conf,"WordCount");//任务名
job.setJarByClass(WordCount.class);// 指定Class
FileInputFormat.addInputPath(job, new Path(args[0]));//输入路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));//输出路径
job.setMapperClass(MapClass.class);//调用上面MapClass类
job.setReducerClass(ReduceClass.class);//调用上面ReduceClass类
//job.setInputFormatClass(TextInputFormat.class);
//job.setOutputFormatClass(TextOutputFormat.class);
//job.setMapOutputKeyClass(Text.class);
//job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.waitForCompletion(true);
return job.isSuccessful()?0:1;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new WordCount(), args);
System.exit(res);
}
}
运行时的报错如下:
15/06/26 07:00:06 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/06/26 07:00:07 INFO input.FileInputFormat: Total input paths to process : 1 15/06/26 07:00:07 WARN snappy.LoadSnappy: Snappy native library not loaded 15/06/26 07:00:08 INFO mapred.JobClient: Running job: job_local_0001 15/06/26 07:00:09 INFO mapred.Task: Using ResourceCalculatorPlugin : null 15/06/26 07:00:09 INFO mapred.MapTask: io.sort.mb = 100 15/06/26 07:00:09 INFO mapred.MapTask: data buffer = 79691776/99614720 15/06/26 07:00:09 INFO mapred.MapTask: record buffer = 262144/327680 15/06/26 07:00:09 WARN mapred.LocalJobRunner: job_local_0001 java.io.IOException : Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1014) at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:691) at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80) at org.apache.hadoop.mapreduce.Mapper.map( Mapper.java:124) at org.apache.hadoop.mapreduce.Mapper.run( Mapper.java:144) at org.apache.hadoop.mapred.MapTask.runNewMapper( MapTask.java:764) at org.apache.hadoop.mapred.MapTask.run( MapTask.java:370) at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212) 15/06/26 07:00:09 INFO mapred.JobClient: map 0% reduce 0% 15/06/26 07:00:09 INFO mapred.JobClient: Job complete: job_local_0001 15/06/26 07:00:09 INFO mapred.JobClient: Counters: 0
请各位大神帮忙看看!
|