本帖最后由 pig2 于 2017-3-3 17:00 编辑
问题导读:
1.如何根据rowkey模糊查询?
2.如何使用Comparator过滤rowkey?
3.如何查询rowkey中包含有某字符串的数据?
RowFilter用于过滤row key Operator | Description | LESS | 小于 | LESS_OR_EQUAL | 小于等于 | [EQUAL | 等于 | NOT_EQUAL | 不等于 | GREATER_OR_EQUAL | 大于等于 | GREATER | 大于 | NO_OP | 排除所有 |
Comparator | Description | BinaryComparator | 使用Bytes.compareTo()比较 | BinaryPrefixComparator | 和BinaryComparator差不多,从前面开始比较 | NullComparator | Does not compare against an actual value but whether a given one is null, or not null. | BitComparator | Performs a bitwise comparison, providing a BitwiseOp class with OR, and XOR operators. | RegexStringComparator | 正则表达式 | SubstringComparator | 把数据当成字符串,用contains()来判断 |
- import java.io.IOException;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.hbase.HBaseConfiguration;
- import org.apache.hadoop.hbase.HColumnDescriptor;
- import org.apache.hadoop.hbase.HTableDescriptor;
- import org.apache.hadoop.hbase.client.HBaseAdmin;
- import org.apache.hadoop.hbase.client.HTable;
- import org.apache.hadoop.hbase.client.Put;
- import org.apache.hadoop.hbase.client.Result;
- import org.apache.hadoop.hbase.client.ResultScanner;
- import org.apache.hadoop.hbase.client.Scan;
- import org.apache.hadoop.hbase.filter.BinaryComparator;
- import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;
- import org.apache.hadoop.hbase.filter.CompareFilter;
- import org.apache.hadoop.hbase.filter.Filter;
- import org.apache.hadoop.hbase.filter.RegexStringComparator;
- import org.apache.hadoop.hbase.filter.RowFilter;
- import org.apache.hadoop.hbase.filter.SubstringComparator;
-
- public class TestHbaseRowFilter {
- String tableName = "test_row_filter";
- Configuration config = HBaseConfiguration.create();
-
- /**
- * 部分代码来自hbase权威指南
- * @throws IOException
- */
- public void testRowFilter() throws IOException {
-
- HTable table = new HTable(config, tableName);
- Scan scan = new Scan();
-
- System.out.println("小于等于row010的行");
- Filter filter1 = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,
- new BinaryComparator("row010".getBytes()));
- scan.setFilter(filter1);
- ResultScanner scanner1 = table.getScanner(scan);
- for (Result res : scanner1) {
- System.out.println(res);
- }
- scanner1.close();
-
- System.out.println("正则获取结尾为5的行");
- Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL,
- new RegexStringComparator(".*5));
- scan.setFilter(filter2);
- ResultScanner scanner2 = table.getScanner(scan);
- for (Result res : scanner2) {
- System.out.println(res);
- }
- scanner2.close();
-
- System.out.println("包含有5的行");
- Filter filter3 = new RowFilter(CompareFilter.CompareOp.EQUAL,
- new SubstringComparator("5"));
- scan.setFilter(filter3);
- ResultScanner scanner3 = table.getScanner(scan);
- for (Result res : scanner3) {
- System.out.println(res);
- }
- scanner3.close();
-
- System.out.println("开头是row01的");
- Filter filter4 = new RowFilter(CompareFilter.CompareOp.EQUAL,
- new BinaryPrefixComparator("row01".getBytes()));
- scan.setFilter(filter4);
- ResultScanner scanner4 = table.getScanner(scan);
- for (Result res : scanner4) {
- System.out.println(res);
- }
- scanner3.close();
- }
-
- /**
- * 初始化数据
- */
- public void init() {
- // 创建表和初始化数据
- try {
- HBaseAdmin admin = new HBaseAdmin(config);
- if (!admin.tableExists(tableName)) {
- HTableDescriptor htd = new HTableDescriptor(tableName);
- HColumnDescriptor hcd1 = new HColumnDescriptor("data");
- htd.addFamily(hcd1);
- HColumnDescriptor hcd2 = new HColumnDescriptor("url");
- htd.addFamily(hcd2);
-
- admin.createTable(htd);
- }
-
- HTable table = new HTable(config, tableName);
-
- table.setAutoFlush(false);
- int count = 50;
- for (int i = 1; i <= count; ++i) {
- Put p = new Put(String.format("row%03d", i).getBytes());
- p.add("data".getBytes(), String.format("col%01d", i % 10)
- .getBytes(), String.format("data%03d", i).getBytes());
- p.add("url".getBytes(), String.format("col%01d", i % 10)
- .getBytes(), String.format("url%03d", i).getBytes());
- table.put(p);
- }
- table.close();
-
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- /**
- * @param args
- * @throws IOException
- */
- public static void main(String[] args) throws IOException {
- TestHbaseRowFilter test = new TestHbaseRowFilter();
- test.init();
- test.testRowFilter();
- }
-
- }
复制代码
输出结果 - 小于等于row010的行
- keyvalues={row001/data:col1/1364133382268/Put/vlen=7, row001/url:col1/1364133382268/Put/vlen=6}
- keyvalues={row002/data:col2/1364133382268/Put/vlen=7, row002/url:col2/1364133382268/Put/vlen=6}
- keyvalues={row003/data:col3/1364133382268/Put/vlen=7, row003/url:col3/1364133382268/Put/vlen=6}
- keyvalues={row004/data:col4/1364133382268/Put/vlen=7, row004/url:col4/1364133382268/Put/vlen=6}
- keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row006/data:col6/1364133382268/Put/vlen=7, row006/url:col6/1364133382268/Put/vlen=6}
- keyvalues={row007/data:col7/1364133382268/Put/vlen=7, row007/url:col7/1364133382268/Put/vlen=6}
- keyvalues={row008/data:col8/1364133382268/Put/vlen=7, row008/url:col8/1364133382268/Put/vlen=6}
- keyvalues={row009/data:col9/1364133382268/Put/vlen=7, row009/url:col9/1364133382268/Put/vlen=6}
- keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}
- 正则获取结尾为5的行
- keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}
- 包行有5的行
- keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row050/data:col0/1364133382268/Put/vlen=7, row050/url:col0/1364133382268/Put/vlen=6}
- 开头是row01的
- keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}
- keyvalues={row011/data:col1/1364133382268/Put/vlen=7, row011/url:col1/1364133382268/Put/vlen=6}
- keyvalues={row012/data:col2/1364133382268/Put/vlen=7, row012/url:col2/1364133382268/Put/vlen=6}
- keyvalues={row013/data:col3/1364133382268/Put/vlen=7, row013/url:col3/1364133382268/Put/vlen=6}
- keyvalues={row014/data:col4/1364133382268/Put/vlen=7, row014/url:col4/1364133382268/Put/vlen=6}
- keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
- keyvalues={row016/data:col6/1364133382268/Put/vlen=7, row016/url:col6/1364133382268/Put/vlen=6}
- keyvalues={row017/data:col7/1364133382268/Put/vlen=7, row017/url:col7/1364133382268/Put/vlen=6}
- keyvalues={row018/data:col8/1364133382268/Put/vlen=7, row018/url:col8/1364133382268/Put/vlen=6}
- keyvalues={row019/data:col9/1364133382268/Put/vlen=7, row019/url:col9/1364133382268/Put/vlen=6}
复制代码
|