使用DistributedCache缓存jar完全没有？

我想使用MR连接mysql读取数据，但是不想每个节点都添加lib包，我就使用缓存，但是更本就没有缓存下来?
代码如下

public static class Map extends Mapper<LongWritable, StudentRecord, LongWritable, Text>{

                @Override
                protected void map(LongWritable key, StudentRecord value,Context context)
                                throws IOException, InterruptedException {
                        context.write(new LongWritable(value.id),
                    new Text(value.toString()));
                }
                
        }
        
        public static class StudentRecord implements Writable,DBWritable{

                public int id;
                public String name;
                public String sex;
                public int age;
                
                
                public void readFields(ResultSet result) throws SQLException {
                           this.id = result.getInt(1);

                   this.name = result.getString(2);

                   this.sex = result.getString(3);

                   this.age = result.getInt(4);
                        
                }
                public void write(PreparedStatement stmt) throws SQLException {
                        
                       stmt.setInt(1, this.id);

                   stmt.setString(2, this.name);

                   stmt.setString(3, this.sex);

                   stmt.setInt(4, this.age);
                }
                
                public void readFields(DataInput in) throws IOException {
                        
                         this.id = in.readInt();

                 this.name = Text.readString(in);

                 this.sex = Text.readString(in);

                 this.age = in.readInt();
                        
                }
                public void write(DataOutput out) throws IOException {
                        out.writeInt(this.id);

                Text.writeString(out, this.name);

                Text.writeString(out, this.sex);

                out.writeInt(this.age);
                }
                @Override
                public String toString() {
                         return new String("学号：" + this.id + "_姓名：" + this.name

                         + "_性别："+ this.sex + "_年龄：" + this.age);
                }
                
                
                
        }
        
        
        public static void main(String[] args) throws Exception {
                
                JobConf conf = new JobConf();
                // 这句话很关键
                conf.set("mapred.job.tracker", "10.0.1.201:9001");
                
                Job job = Job.getInstance(conf, "aaa");
                
                job.setJarByClass(Fuck.class);
                
                // 非常重要，值得关注
                DistributedCache.addFileToClassPath(new Path("hdfs://xx.xx.xx.x:9000/lib/mysql-connector-java-5.1.28.jar"), job.getConfiguration());
        
                
                // 设置输入类型
                //job.setInputFormatClass(DBInputFormat.class);
                
                // 设置输出类型
                job.setOutputKeyClass(LongWritable.class);
                job.setOutputValueClass(Text.class);
                
        // 设置Map和Reduce类
                job.setMapperClass(Map.class);
                
                FileInputFormat.setInputPaths(job, new Path(args[0]));
                
        // 设置输出目录
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
                
        // 建立数据库连接
        
        DBConfiguration.configureDB(job.getConfiguration(), "com.mysql.jdbc.Driver",
                        "jdbc:mysql://localhost:3306/school", "root", "root");
        
        // 读取"student"表中的数据
        String[] fields = { "id", "name", "sex", "age" };
        
        DBInputFormat.setInput(job, StudentRecord.class, "student", null, "id", fields);
        
        System.out.println(job.waitForCompletion(true)?1:0);
        }
        
复制代码

异常是，找不到com.mysql.jdbc.Driver

Joker · 发表于 2015-1-7 14:52:01

log4j:WARN No appenders could be found for logger (org.apache.hadoop.conf.Configuration.deprecation).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Exception in thread "main" java.lang.RuntimeException: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
        at org.apache.hadoop.mapreduce.lib.db.DBInputFormat.setConf(DBInputFormat.java:168)
        at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:73)
        at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
        at org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.java:488)
        at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:508)
        at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:392)
        at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268)
        at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Unknown Source)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
        at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265)
        at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286)
        at cn.base.jdbc.Fuck.main(Fuck.java:144)
Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
        at org.apache.hadoop.mapreduce.lib.db.DBInputFormat.getConnection(DBInputFormat.java:194)
        at org.apache.hadoop.mapreduce.lib.db.DBInputFormat.setConf(DBInputFormat.java:162)
        ... 13 more
Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
        at java.net.URLClassLoader$1.run(Unknown Source)
        at java.net.URLClassLoader$1.run(Unknown Source)
        at java.security.AccessController.doPrivileged(Native Method)
        at java.net.URLClassLoader.findClass(Unknown Source)
        at java.lang.ClassLoader.loadClass(Unknown Source)
        at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
        at java.lang.ClassLoader.loadClass(Unknown Source)
        at java.lang.Class.forName0(Native Method)
        at java.lang.Class.forName(Unknown Source)
        at org.apache.hadoop.mapreduce.lib.db.DBConfiguration.getConnection(DBConfiguration.java:148)
        at org.apache.hadoop.mapreduce.lib.db.DBInputFormat.getConnection(DBInputFormat.java:188)
        ... 14 more
复制代码

muyannian · 发表于 2015-1-7 17:08:11

Joker 发表于 2015-1-7 14:52

不明白楼主是什么意思？
楼主是想缓存环境？？
一些jar包，让DistributedCache来完成。这个jar包是什么jar包？
如果说是运行环境所必须的，还是建议手动完成。有专门的集群管理工具，或则用shell分发。

Joker · 发表于 2015-1-7 17:10:18

本帖最后由 Joker 于 2015-1-7 17:15 编辑

muyannian 发表于 2015-1-7 17:08
不明白楼主是什么意思？
楼主是想缓存环境？？
一些jar包，让DistributedCache来完成。这个jar包是什么j ...

我想做的是缓存Jar，然后再提交Job，job执行之前DistributedCache会进行分发这个Jar加入到ClassPath中，然后可以运行，不需要在每个节点上进行手动的分发
我缓存的Jar包是mysql-connector-java-5.1.28.jar
一个数据库的JAR

Joker · 发表于 2015-1-7 17:10:52

muyannian 发表于 2015-1-7 17:08
不明白楼主是什么意思？
楼主是想缓存环境？？
一些jar包，让DistributedCache来完成。这个jar包是什么j ...

需求就是要使用DistributedCache来进行缓存

muyannian · 发表于 2015-1-7 18:32:28

Joker 发表于 2015-1-7 17:10
需求就是要使用DistributedCache来进行缓存

感觉楼主的程序有些问题，而且缓存还需要修改配置

package com.netqin.examples;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class CacheDemo {

public static void UseDistributedCacheBySymbolicLink() throws Exception {
      FileReader reader = new FileReader("hdfs://mail.py");
      BufferedReader br = new BufferedReader(reader);
      String s = null;
      while ((s = br.readLine()) != null) {
         System.out.println(s);
      }
      br.close();
      reader.close();
}

public static class TokenizerMapper extends
         Mapper<Object, Text, Text, IntWritable> {

      private final static IntWritable one = new IntWritable(1);
      private Text word = new Text();

      protected void setup(Context context) throws IOException,
            InterruptedException {
         System.out.println("Now, use the distributed cache and syslink");
         try {
            UseDistributedCacheBySymbolicLink();
         } catch (Exception e) {
            e.printStackTrace();
         }
      }

      public void map(Object key, Text value, Context context)
            throws IOException, InterruptedException {
         StringTokenizer itr = new StringTokenizer(value.toString());
         while (itr.hasMoreTokens()) {
            word.set(itr.nextToken());
            context.write(word, one);
         }
      }
}

public static class IntSumReducer extends
         Reducer<Text, IntWritable, Text, IntWritable> {
      private IntWritable result = new IntWritable();

      public void reduce(Text key, Iterable<IntWritable> values,
            Context context) throws IOException, InterruptedException {
         int sum = 0;
         for (IntWritable val : values) {
            sum += val.get();
         }
         result.set(sum);
         context.write(key, result);
      }
}

public static void main(String[] args) throws Exception {
      Configuration conf = new Configuration();
      String[] otherArgs = new GenericOptionsParser(conf, args)
            .getRemainingArgs();
      if (otherArgs.length != 2) {
         System.err.println("Usage: wordcount <in> <out>");
         System.exit(2);
      }

      DistributedCache.createSymlink(conf);
      String path = "/tmp/test/mail.py";
      Path filePath = new Path(path);
      String uriWithLink = filePath.toUri().toString() + "#" + "mail.py";//记得加上井号
      DistributedCache.addCacheFile(new URI(uriWithLink), conf);

      // Path p = new Path("/tmp/hadoop-0.20.2-capacity-scheduler.jar#hadoop-0.20.2-capacity-scheduler.jar");
      // DistributedCache.addArchiveToClassPath(p, conf);


      Job job = new Job(conf, "CacheDemo");
      job.setJarByClass(CacheDemo.class);
      job.setMapperClass(TokenizerMapper.class);
      job.setCombinerClass(IntSumReducer.class);
      job.setReducerClass(IntSumReducer.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(IntWritable.class);
      FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
      FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
      System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

<property>
  <name>mapred.local.dir</name>
  <value>${hadoop.tmp.dir}/mapred/local</value>
  <description>The local directory where MapReduce stores intermediate
  data files.  May be a comma-separated list of
  directories on different devices in order to spread disk i/o.
  Directories that do not exist are ignored.
  </description>
</property>
 
<property>
  <name>local.cache.size</name>
  <value>10737418240</value> （默认大小：10GB）
  <description>The limit on the size of cache you want to keep, set by default
  to 10GB. This will act as a soft limit on the cache directory for out of band data.
  </description>
</property>
 
复制代码

注意符号链接
每个存储在HDFS中的文件被放到缓存中后都可以通过一个符号链接使用。
URI hdfs://namenode/test/input/file1#myfile 你可以在程序中直接使用myfile来访问 file1这个文件。 myfile是一个符号链接文件。

howtodown · 发表于 2015-1-7 18:48:06

本帖最后由 howtodown 于 2015-1-7 18:51 编辑

Joker 发表于 2015-1-7 17:10
需求就是要使用DistributedCache来进行缓存

你的代码顺序不对，把

// 非常重要，值得关注
DistributedCache.addFileToClassPath(new Path("hdfs://xx.xx.xx.x:9000/lib/mysql-connector-java-5.1.28.jar"), job.getConfiguration());

放到

   JobConf conf = new JobConf();
复制代码

前面。
更多内容，可以参考@muyannian的程序

Joker · 发表于 2015-1-7 20:43:06

howtodown 发表于 2015-1-7 18:48
本帖最后由 howtodown 于 2015-1-7 18:51 编辑

你的代码顺序不对，把

博主你好，我放在Job初始化前面，可以还是找不到类

    
        Configuration conf = new Configuration();
        
        Path jarPath = new Path("/lib/mysql-connector-java-5.1.13-bin.jar");
        DistributedCache.addArchiveToClassPath(jarPath, conf);
        
        // 这句话很关键
        conf.set("mapred.job.tracker", "10.0.1.201:9001");
        // 非常重要，值得关注
       
        Job job = new Job(conf,"xxoo");
        
       
        
        job.setJarByClass(Test.class);
        
       
        
        // 设置输入类型
        //job.setInputFormatClass(DBInputFormat.class);
        
        // 设置输出类型
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);
        
// 设置Map和Reduce类
        job.setMapperClass(Map.class);
        
//        FileInputFormat.setInputPaths(job, new Path(args[0]));
        
// 设置输出目录
FileOutputFormat.setOutputPath(job, new Path("db_out"));
        
// 建立数据库连接

DBConfiguration.configureDB(job.getConfiguration(), "com.mysql.jdbc.Driver",
                "jdbc:mysql://10.0.1.201:3306/school", "root", "root");

// 读取"student"表中的数据
String[] fields = { "id", "name", "sex", "age" };

DBInputFormat.setInput(job, StudentRecord.class, "student", null, "id", fields);

System.out.println(job.waitForCompletion(true)?0:1);
复制代码