一个非常简单的程序,指定一条样本数据,从文件中读取样本数据进mapper,求指定的样本与每一条split样本的距离,在reducer中规约,找到最小的距离样本信息,并输出。在mapper中,读取一条Text,转换成字符串,并用自己写的Sample对象封装样本数据。但是在运行的时候报空指针异常,如下图
空指针异常
程序代码如下
封装样本的对象
[mw_shl_code=java,true]package com.qyz.smote;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class Sample {
private int id;
private Date date;
private String areaId;
private double avgWatchTime;
private int numWatcher;
//原始数据
private String dataLine;
public Sample(String line){
String[] strs = line.split(" +|\\t");
this.id = Integer.parseInt(strs[0]);
this.date = parseDate(strs[1]);
this.areaId = strs[2];
this.avgWatchTime = Double.parseDouble(strs[3]);
this.numWatcher = Integer.parseInt(strs[4]);
this.dataLine = line;
}
/*
* 获得日期
*/
private Date parseDate(String string) {
DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
Date date = null;
try {
date = df.parse(string);
} catch (ParseException e) {
e.printStackTrace();
}
return date;
}
/**
* 获取属性值
* @return
*/
public double[] getAttr(){
double[] attrs = new double[2];
attrs[0] = this.avgWatchTime;
attrs[1] = this.numWatcher;
return attrs;
}
public int getId() {
return id;
}
public Date getDate() {
return date;
}
public String getAreaId() {
return areaId;
}
public double getAvgWatchTime() {
return avgWatchTime;
}
public int getNumWatcher() {
return numWatcher;
}
public String getDataLine() {
return dataLine;
}
@Override
public String toString() {
return this.getDataLine();
}
//==============test=========
public static void main(String[] args){
Sample s = new Sample("5 2016-05-31 0537 90.1025161743164 70925 ");
System.out.println(s.getAttr()[0] + ":" + s.getAttr()[1]);
}
}
[/mw_shl_code]
Mapper
[mw_shl_code=java,true]package com.qyz.smote;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SmoteMapper extends Mapper<LongWritable, Text, IntWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
//TODO 指定一个固定的样本
Sample testSample = new Sample("5 2016-05-31 0537 90.1025161743164 70925 ");
//从split中获得样本
String smpStr = value.toString();
Sample trainSample = new Sample(smpStr);
//获得属性数组
double[] testAttr = testSample.getAttr();
double[] trainAttr = trainSample.getAttr();
//计算距离
double distance = calDistance(testAttr,trainAttr);
context.write(new IntWritable(testSample.getId()), new Text(smpStr + "\t" + distance));
}
/**
* 计算两个向量的欧式距离
* @param a
* @param b
* @return
*/
private double calDistance(double[] a,double[] b){
if(a.length != b.length){
System.err.println(("错误:存在属性值个数不一样的样本!!!"));
System.exit(1);
}
double dis = 0;
for(int i = 0;i < a.length;i++)
dis += Math.pow((a - b), 2);
dis = Math.sqrt(dis);
return dis;
}
}
[/mw_shl_code]
Reducer
[mw_shl_code=applescript,true]package com.qyz.smote;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class SmoteReducer extends Reducer<IntWritable, Text, IntWritable, Text> {
@Override
protected void reduce(IntWritable key, Iterable<Text> samples, Context context) throws IOException, InterruptedException {
//最近的样本
Text result = null;
double min = Double.MAX_VALUE;
//归约所有数据,找到距离最小的
for(Text s : samples){
String[] dataStrs = s.toString().split(" +|\\t");
double distance = Double.parseDouble(dataStrs[dataStrs.length-1]);
if(distance < min){
min = distance;
result = s;
}
}
context.write(key, result);
}
}
[/mw_shl_code]
主函数
[mw_shl_code=applescript,true]package com.qyz.smote;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.sun.jersey.core.impl.provider.entity.XMLJAXBElementProvider.Text;
public class Smote {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//新建一个job配置对象
JobConf conf = new JobConf();
//job对象
@SuppressWarnings("deprecation")
Job job = new Job(conf);
// //设置job的输入对象
// job.setInputFormatClass(NLineInputFormat.class);
// //设置map的输入value为每10行
// job.getConfiguration().set(NLineInputFormat.LINES_PER_MAP, "10");
job.setJobName("========邱玉章的smote!!!==========");
job.setJarByClass(Smote.class);
//设置输入输出流
FileInputFormat.setInputPaths(job, new Path("E:\\qyzfile\\大数据工作室\\smoteData"));
FileOutputFormat.setOutputPath(job, new Path("E:\\qyzfile\\大数据工作室\\smoteOutput"));
//设置mapper,reducer的类
job.setMapperClass(SmoteMapper.class);
job.setReducerClass(SmoteReducer.class);
//设置输出格式
job.setMapOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
//系统退出
System.exit(job.waitForCompletion(true) ? 0 : 1);
// //==============test===============
// String lpm = job.getConfiguration().get(NLineInputFormat.LINES_PER_MAP);
// System.out.println(lpm);
//
}
}
[/mw_shl_code]
烦请各位前辈解答!!不胜感激
|
|