问题导读:
1、单线程下HBase的插入性能如何?
2、如何在多线程下了解HBase的性能?
一、引言:
关于HBase插入性能优化设计到的五个参数,从参数配置的角度给大家提供了一个性能测试环境的实验代码。根据网友的反馈,基于单线程的模式实现的数据插入毕竟有限。通过个人实测,在我的虚拟机环境下,单线程插入数据的值约为4w/s。集群指标是:CPU双核1.83,虚拟机512M内存,集群部署单点模式。本文给出了基于多线程并发模式的,测试代码案例和实测结果,希望能给大家一些启示:
二、源程序:
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.hbase.HBaseConfiguration;
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.Random;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.hbase.HBaseConfiguration;
- import org.apache.hadoop.hbase.client.HBaseAdmin;
- import org.apache.hadoop.hbase.client.HTable;
- import org.apache.hadoop.hbase.client.HTableInterface;
- import org.apache.hadoop.hbase.client.HTablePool;
- import org.apache.hadoop.hbase.client.Put;
- public class HBaseImportEx {
- static Configuration hbaseConfig = null;
- public static HTablePool pool = null;
- public static String tableName = "T_TEST_1";
- static{
- //conf = HBaseConfiguration.create();
- Configuration HBASE_CONFIG = new Configuration();
- HBASE_CONFIG.set("hbase.master", "192.168.230.133:60000");
- HBASE_CONFIG.set("hbase.zookeeper.quorum", "192.168.230.133");
- HBASE_CONFIG.set("hbase.zookeeper.property.clientPort", "2181");
- hbaseConfig = HBaseConfiguration.create(HBASE_CONFIG);
-
- pool = new HTablePool(hbaseConfig, 1000);
- }
- /*
- * Insert Test single thread
- * */
- public static void SingleThreadInsert()throws IOException
- {
- System.out.println("---------开始SingleThreadInsert测试----------");
- long start = System.currentTimeMillis();
- //HTableInterface table = null;
- HTable table = null;
- table = (HTable)pool.getTable(tableName);
- table.setAutoFlush(false);
- table.setWriteBufferSize(24*1024*1024);
- //构造测试数据
- List<Put> list = new ArrayList<Put>();
- int count = 10000;
- byte[] buffer = new byte[350];
- Random rand = new Random();
- for(int i=0;i<count;i++)
- {
- Put put = new Put(String.format("row %d",i).getBytes());
- rand.nextBytes(buffer);
- put.add("f1".getBytes(), null, buffer);
- //wal=false
- put.setWriteToWAL(false);
- list.add(put);
- if(i%10000 == 0)
- {
- table.put(list);
- list.clear();
- table.flushCommits();
- }
- }
- long stop = System.currentTimeMillis();
- //System.out.println("WAL="+wal+",autoFlush="+autoFlush+",buffer="+writeBuffer+",count="+count);
-
- System.out.println("插入数据:"+count+"共耗时:"+ (stop - start)*1.0/1000+"s");
-
- System.out.println("---------结束SingleThreadInsert测试----------");
- }
- /*
- * 多线程环境下线程插入函数
- *
- * */
- public static void InsertProcess()throws IOException
- {
- long start = System.currentTimeMillis();
- //HTableInterface table = null;
- HTable table = null;
- table = (HTable)pool.getTable(tableName);
- table.setAutoFlush(false);
- table.setWriteBufferSize(24*1024*1024);
- //构造测试数据
- List<Put> list = new ArrayList<Put>();
- int count = 10000;
- byte[] buffer = new byte[256];
- Random rand = new Random();
- for(int i=0;i<count;i++)
- {
- Put put = new Put(String.format("row %d",i).getBytes());
- rand.nextBytes(buffer);
- put.add("f1".getBytes(), null, buffer);
- //wal=false
- put.setWriteToWAL(false);
- list.add(put);
- if(i%10000 == 0)
- {
- table.put(list);
- list.clear();
- table.flushCommits();
- }
- }
- long stop = System.currentTimeMillis();
- //System.out.println("WAL="+wal+",autoFlush="+autoFlush+",buffer="+writeBuffer+",count="+count);
-
- System.out.println("线程:"+Thread.currentThread().getId()+"插入数据:"+count+"共耗时:"+ (stop - start)*1.0/1000+"s");
- }
-
-
- /*
- * Mutil thread insert test
- * */
- public static void MultThreadInsert() throws InterruptedException
- {
- System.out.println("---------开始MultThreadInsert测试----------");
- long start = System.currentTimeMillis();
- int threadNumber = 10;
- Thread[] threads=new Thread[threadNumber];
- for(int i=0;i<threads.length;i++)
- {
- threads[i]= new ImportThread();
- threads[i].start();
- }
- for(int j=0;j< threads.length;j++)
- {
- (threads[j]).join();
- }
- long stop = System.currentTimeMillis();
-
- System.out.println("MultThreadInsert:"+threadNumber*10000+"共耗时:"+ (stop - start)*1.0/1000+"s");
- System.out.println("---------结束MultThreadInsert测试----------");
- }
- /**
- * @param args
- */
- public static void main(String[] args) throws Exception{
- // TODO Auto-generated method stub
- //SingleThreadInsert();
- MultThreadInsert();
-
-
- }
-
- public static class ImportThread extends Thread{
- public void HandleThread()
- {
- //this.TableName = "T_TEST_1";
-
-
- }
- //
- public void run(){
- try{
- InsertProcess();
- }
- catch(IOException e){
- e.printStackTrace();
- }finally{
- System.gc();
- }
- }
- }
- }
复制代码
三、说明
1.线程数设置需要根据本集群硬件参数,实际测试得出。否则线程过多的情况下,总耗时反而是下降的。
2.单笔提交数对性能的影响非常明显,需要在自己的环境下,找到最理想的数值,这个需要与单条记录的字节数相关。
四、测试结果
---------开始MultThreadInsert测试----------
线程:8插入数据:10000共耗时:1.328s
线程:16插入数据:10000共耗时:1.562s
线程:11插入数据:10000共耗时:1.562s
线程:10插入数据:10000共耗时:1.812s
线程:13插入数据:10000共耗时:2.0s
线程:17插入数据:10000共耗时:2.14s
线程:14插入数据:10000共耗时:2.265s
线程:9插入数据:10000共耗时:2.468s
线程:15插入数据:10000共耗时:2.562s
线程:12插入数据:10000共耗时:2.671s
MultThreadInsert:100000共耗时:2.703s
---------结束MultThreadInsert测试----------
最后,感谢原作者的无私分享:51cto
|