本帖最后由 lzw 于 2013-12-30 22:40 编辑
最近练习了一下hadoop mapreduce 如何通过configuration传递参数到mapreduce context 上下文中。 设置confgiruation连接hbase参数,将mapreduce统计结果直接存入hbase,参数以后可以通过properties文件读取,下面是配置configruation:
// TODO Auto-generated method stub
Configuration conf = new Configuration();
conf.set("hbase_host", "192.168.0.101"); // 设置上下文链接hbaseIP
Job job = new Job(conf,"OutputHbase");
//TableMapReduceUtil.addDependencyJars(job);
job.setJarByClass(OutputHbase.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1); 复制代码
下面是mapreuce从context上下文中获取hbase链接IP public static class Map extends Mapper<Object,Text,Text,Text>{
private Text outKey = new Text();
private Text outVal = new Text();
public void map(Object key,Text value,Context context) throws IOException,InterruptedException{
String[] valueSplitted = value.toString().split(",");
if(valueSplitted.length == 3){
String brand = valueSplitted[0];
String model = valueSplitted[1];
String size = valueSplitted[2];
outKey.set(brand);
outVal.set(model + "," + size);
context.write(outKey, outVal);
}
}
}
public static class Reduce extends Reducer<Text,Text,Text,Text>{
private HTablePool pool = null;
private HTableInterface testHTable = null;
private List<Put> testListPut = new ArrayList<Put>();
@Override
public void setup(Context context){
// logger.info("hbase_host----" + context.getConfiguration().get("hbase_host"));
// logger.info("maxSize----" + context.getConfiguration().get("maxSize"));
Configuration jobConf = context.getConfiguration();
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", jobConf.get("hbase_host")); //从context上下文中获取连接hbase地址
pool = new HTablePool(conf, 10);
testHTable = pool.getTable("TestCars");
}
@Override
public void reduce(Text key,Iterable<Text> values,Context context)throws IOException,InterruptedException{
String brand = key.toString();
for(Text tx : values){
String[] valueSplitted = tx.toString().split(",");
if(valueSplitted.length == 2){
String model = valueSplitted[0];
String size = valueSplitted[1];
byte[] putKey = Bytes.toBytes(brand+","+model);
byte[] putFmaily = Bytes.toBytes("Car");
Put put = new Put(putKey);
byte[] putQ = Bytes.toBytes("brand");
byte[] putVal = Bytes.toBytes(brand);
put.add(putFmaily,putQ,putVal);
putQ = Bytes.toBytes("model");
putVal = Bytes.toBytes(model);
put.add(putFmaily,putQ,putVal);
putQ = Bytes.toBytes("size");
putVal = Bytes.toBytes(size);
put.add(putFmaily,putQ,putVal);
testListPut.add(put);
}
}// End for
testHTable.put(testListPut);
testHTable.flushCommits();
}
@Override
public void cleanup(Context context)throws IOException{
if(null != testHTable){
testHTable.close();
}
if(null != pool){
pool.close();
}
}
}
复制代码
下面是连接hbase查询代码:
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class ConnectionHbase {
private static HTablePool pool = null;
/**
* @param args
*/
public static void main(String[] args) {
ConnectionHbase hbase = new ConnectionHbase();
hbase.run();
}
public void run() {
// TODO Auto-generated method stub
Configuration conf = HBaseConfiguration.create();
HTableInterface testHTable = null;
conf.set("hbase.zookeeper.quorum", "192.168.0.101");
pool = new HTablePool(conf, 10);
testHTable = pool.getTable("TestCars");
Scan scan = new Scan();
try {
ResultScanner res = testHTable.getScanner(scan);
for(Result rs : res){
System.out.println(Bytes.toString(rs.getRow()));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
复制代码
下面为查询出结果:
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:host.name=robinliu-PC
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:java.version=1.6.0_32-ea
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:java.vendor=Sun Microsystems Inc.
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:java.home=C:\Program Files\Java\jdk1.6.0_32\jre
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:java.class.path=D:\job\developing\duplicate\target\classes;D:\soft\dev\maven\repos\org\apache\hbase\hbase\0.94.6\hbase-0.94.6.jar;D:\soft\dev\maven\repos\com\yammer\metrics\metrics-core\2.1.2\metrics-core-2.1.2.jar;D:\soft\dev\maven\repos\com\google\guava\guava\11.0.2\guava-11.0.2.jar;D:\soft\dev\maven\repos\com\google\code\findbugs\jsr305\1.3.9\jsr305-1.3.9.jar;D:\soft\dev\maven\repos\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;D:\soft\dev\maven\repos\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;D:\soft\dev\maven\repos\commons-collections\commons-collections\3.2.1\commons-collections-3.2.1.jar;D:\soft\dev\maven\repos\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;D:\soft\dev\maven\repos\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;D:\soft\dev\maven\repos\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;D:\soft\dev\maven\repos\com\github\stephenc\high-scale-lib\high-scale-lib\1.1.1\high-scale-lib-1.1.1.jar;D:\soft\dev\maven\repos\commons-codec\commons-codec\1.4\commons-codec-1.4.jar;D:\soft\dev\maven\repos\commons-httpclient\commons-httpclient\3.1\commons-httpclient-3.1.jar;D:\soft\dev\maven\repos\commons-io\commons-io\2.1\commons-io-2.1.jar;D:\soft\dev\maven\repos\commons-lang\commons-lang\2.5\commons-lang-2.5.jar;D:\soft\dev\maven\repos\commons-logging\commons-logging\1.1.1\commons-logging-1.1.1.jar;D:\soft\dev\maven\repos\log4j\log4j\1.2.16\log4j-1.2.16.jar;D:\soft\dev\maven\repos\org\apache\avro\avro\1.5.3\avro-1.5.3.jar;D:\soft\dev\maven\repos\org\xerial\snappy\snappy-java\1.0.3.2\snappy-java-1.0.3.2.jar;D:\soft\dev\maven\repos\org\apache\avro\avro-ipc\1.5.3\avro-ipc-1.5.3.jar;D:\soft\dev\maven\repos\org\jboss\netty\netty\3.2.4.Final\netty-3.2.4.Final.jar;D:\soft\dev\maven\repos\org\apache\velocity\velocity\1.7\velocity-1.7.jar;D:\soft\dev\maven\repos\org\apache\zookeeper\zookeeper\3.4.5\zookeeper-3.4.5.jar;D:\soft\dev\maven\repos\org\apache\thrift\libthrift\0.8.0\libthrift-0.8.0.jar;D:\soft\dev\maven\repos\org\jruby\jruby-complete\1.6.5\jruby-complete-1.6.5.jar;D:\soft\dev\maven\repos\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar;D:\soft\dev\maven\repos\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;D:\soft\dev\maven\repos\org\mortbay\jetty\jsp-2.1\6.1.14\jsp-2.1-6.1.14.jar;D:\soft\dev\maven\repos\org\mortbay\jetty\jsp-api-2.1\6.1.14\jsp-api-2.1-6.1.14.jar;D:\soft\dev\maven\repos\org\mortbay\jetty\servlet-api-2.5\6.1.14\servlet-api-2.5-6.1.14.jar;D:\soft\dev\maven\repos\org\codehaus\jackson\jackson-core-asl\1.8.8\jackson-core-asl-1.8.8.jar;D:\soft\dev\maven\repos\org\codehaus\jackson\jackson-mapper-asl\1.8.8\jackson-mapper-asl-1.8.8.jar;D:\soft\dev\maven\repos\org\codehaus\jackson\jackson-jaxrs\1.8.8\jackson-jaxrs-1.8.8.jar;D:\soft\dev\maven\repos\org\codehaus\jackson\jackson-xc\1.8.8\jackson-xc-1.8.8.jar;D:\soft\dev\maven\repos\org\slf4j\slf4j-api\1.4.3\slf4j-api-1.4.3.jar;D:\soft\dev\maven\repos\org\slf4j\slf4j-log4j12\1.4.3\slf4j-log4j12-1.4.3.jar;D:\soft\dev\maven\repos\tomcat\jasper-compiler\5.5.23\jasper-compiler-5.5.23.jar;D:\soft\dev\maven\repos\tomcat\jasper-runtime\5.5.23\jasper-runtime-5.5.23.jar;D:\soft\dev\maven\repos\org\jamon\jamon-runtime\2.3.1\jamon-runtime-2.3.1.jar;D:\soft\dev\maven\repos\com\google\protobuf\protobuf-java\2.4.0a\protobuf-java-2.4.0a.jar;D:\soft\dev\maven\repos\com\sun\jersey\jersey-core\1.8\jersey-core-1.8.jar;D:\soft\dev\maven\repos\com\sun\jersey\jersey-json\1.8\jersey-json-1.8.jar;D:\soft\dev\maven\repos\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;D:\soft\dev\maven\repos\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar;D:\soft\dev\maven\repos\com\sun\jersey\jersey-server\1.8\jersey-server-1.8.jar;D:\soft\dev\maven\repos\asm\asm\3.1\asm-3.1.jar;D:\soft\dev\maven\repos\javax\xml\bind\jaxb-api\2.1\jaxb-api-2.1.jar;D:\soft\dev\maven\repos\javax\activation\activation\1.1\activation-1.1.jar;D:\soft\dev\maven\repos\stax\stax-api\1.0.1\stax-api-1.0.1.jar;D:\soft\dev\maven\repos\org\apache\hadoop\hadoop-core\1.0.4\hadoop-core-1.0.4.jar;D:\soft\dev\maven\repos\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;D:\soft\dev\maven\repos\org\apache\commons\commons-math\2.1\commons-math-2.1.jar;D:\soft\dev\maven\repos\commons-net\commons-net\1.4.1\commons-net-1.4.1.jar;D:\soft\dev\maven\repos\commons-el\commons-el\1.0\commons-el-1.0.jar;D:\soft\dev\maven\repos\net\java\dev\jets3t\jets3t\0.7.1\jets3t-0.7.1.jar;D:\soft\dev\maven\repos\net\sf\kosmosfs\kfs\0.3\kfs-0.3.jar;D:\soft\dev\maven\repos\hsqldb\hsqldb\1.8.0.10\hsqldb-1.8.0.10.jar;D:\soft\dev\maven\repos\oro\oro\2.0.8\oro-2.0.8.jar;D:\soft\dev\maven\repos\org\eclipse\jdt\core\3.1.1\core-3.1.1.jar
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:java.library.path=C:\Program Files\Java\jdk1.6.0_32\bin;C:\Windows\Sun\Java\bin;C:\Windows\system32;C:\Windows;C:\Perl64\site\bin;C:\Perl64\bin;C:\Program Files\Common Files\Microsoft Shared\Windows Live;C:\Program Files (x86)\Common Files\Microsoft Shared\Windows Live;C:\Program Files (x86)\AMD APP\bin\x86_64;C:\Program Files (x86)\AMD APP\bin\x86;C:\Program Files (x86)\Common Files\NetSarang;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;D:\Program Files\TortoiseSVN\bin;C:\Program Files (x86)\Windows Live\Shared;C:\Program Files (x86)\Common Files\Acronis\SnapAPI\;C:\Program Files\Java\jdk1.6.0_32\bin;C:\apache-maven-3.1.0\bin;.
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:java.io.tmpdir=C:\Users\robinliu\AppData\Local\Temp\
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:java.compiler=<NA>
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:os.name=Windows 7
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:os.arch=amd64
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:os.version=6.1
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:user.name=robinliu
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:user.home=C:\Users\robinliu
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Client environment:user.dir=D:\job\developing\duplicate
13/12/30 22:12:30 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=192.168.0.101:2181 sessionTimeout=180000 watcher=hconnection
13/12/30 22:12:30 INFO zookeeper.RecoverableZooKeeper: The identifier of this process is 7068@robinliu-PC
13/12/30 22:12:30 INFO zookeeper.ClientCnxn: Opening socket connection to server hadoop.master/192.168.0.101:2181. Will not attempt to authenticate using SASL (无法定位登录配置)
13/12/30 22:12:30 INFO zookeeper.ClientCnxn: Socket connection established to hadoop.master/192.168.0.101:2181, initiating session
13/12/30 22:12:30 INFO zookeeper.ClientCnxn: Session establishment complete on server hadoop.master/192.168.0.101:2181, sessionid = 0x14343c460920014, negotiated timeout = 180000
Acura,Integra
Acura,Legend
Audi,100
Audi,90
BMW,535i
Buick,Century
Buick,LeSabre
Buick,Riviera
Buick,Roadmaster
Cadillac,DeVille
Cadillac,Seville
复制代码
程序中仅仅查询出来rowkey值
来自群组: hadoop技术组