From a473b8847ae2abbde8eea24316f6ab55dd33d0da Mon Sep 17 00:00:00 2001 From: miyuan-ljr Date: Fri, 18 Oct 2024 16:22:25 +0800 Subject: [PATCH] fuzzy --- .../hbase/filter/HBaseFilterUtils.java | 32 ++ .../oceanbase/hbase/HTableTestBase.java | 288 ++++++++++++++++++ .../hbase/filter/HBaseFilterUtilsTest.java | 12 + 3 files changed, 332 insertions(+) diff --git a/src/main/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtils.java b/src/main/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtils.java index 522f2ab2..2eebd870 100644 --- a/src/main/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtils.java +++ b/src/main/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtils.java @@ -20,6 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.filter.*; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import java.lang.reflect.Field; @@ -66,6 +67,8 @@ private static void toParseableByteArray(ByteArrayOutputStream byteStream, Filte toParseableByteArray(byteStream, (FirstKeyOnlyFilter) filter); } else if (filter instanceof KeyOnlyFilter) { toParseableByteArray(byteStream, (KeyOnlyFilter) filter); + } else if (filter instanceof FuzzyRowFilter) { + toParseableByteArray(byteStream, (FuzzyRowFilter) filter); } else if (filter instanceof TimestampsFilter) { toParseableByteArray(byteStream, (TimestampsFilter) filter); } else if (filter instanceof MultiRowRangeFilter) { @@ -224,6 +227,35 @@ private static void toParseableByteArray(ByteArrayOutputStream byteStream, KeyOn byteStream.write(')'); } + // FuzzyRowFilter('abc','101','ddd','010'); + private static void toParseableByteArray(ByteArrayOutputStream byteStream, FuzzyRowFilter filter) throws IOException { + byteStream.write(filter.getClass().getSimpleName().getBytes()); + byteStream.write('('); + + List> fuzzyKeysData; + try { + Field field = filter.getClass().getDeclaredField("fuzzyKeysData"); + field.setAccessible(true); + fuzzyKeysData = (List>)field.get(filter); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + for (int i = 0; i < fuzzyKeysData.size(); i ++) { + Pair data = fuzzyKeysData.get(i); + byteStream.write("'".getBytes()); + byteStream.write(data.getFirst()); + byteStream.write("'".getBytes()); + byteStream.write(','); + byteStream.write("'".getBytes()); + byteStream.write(data.getSecond()); + byteStream.write("'".getBytes()); + if (i < fuzzyKeysData.size() - 1) { + byteStream.write(','); + } + } + byteStream.write(')'); + } + private static void toParseableByteArray(ByteArrayOutputStream byteStream, TimestampsFilter filter) throws IOException { byteStream.write(filter.getClass().getSimpleName().getBytes()); byteStream.write('('); diff --git a/src/test/java/com/alipay/oceanbase/hbase/HTableTestBase.java b/src/test/java/com/alipay/oceanbase/hbase/HTableTestBase.java index b8fe6c8c..ed663321 100644 --- a/src/test/java/com/alipay/oceanbase/hbase/HTableTestBase.java +++ b/src/test/java/com/alipay/oceanbase/hbase/HTableTestBase.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.*; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import org.junit.Assert; import org.junit.Ignore; import org.junit.Rule; @@ -1671,6 +1672,293 @@ public void testFilter2() throws Exception { scanner.close(); } + @Test + public void testFuzzyRowFilter() throws Exception { + String key1 = "abab"; + String key2 = "abcc"; + String column1 = "c1"; + String column2 = "c2"; + String column3 = "c3"; + String column4 = "c4"; + String column5 = "c5"; + String value1 = "value1"; + String value2 = "value2"; + String value3 = "value3"; + String family = "family1"; + Delete deleteKey1Family = new Delete(toBytes(key1)); + deleteKey1Family.deleteFamily(toBytes(family)); + + Delete deleteKey2Family = new Delete(toBytes(key2)); + deleteKey2Family.deleteFamily(toBytes(family)); + + hTable.delete(deleteKey1Family); + hTable.delete(deleteKey2Family); + + Put putKey1Column1Value1 = new Put(toBytes(key1)); + putKey1Column1Value1.add(toBytes(family), toBytes(column1), toBytes(value1)); + + Put putKey1Column1Value2 = new Put(toBytes(key1)); + putKey1Column1Value2.add(toBytes(family), toBytes(column1), toBytes(value2)); + + Put putKey1Column2Value2 = new Put(toBytes(key1)); + putKey1Column2Value2.add(toBytes(family), toBytes(column2), toBytes(value2)); + + Put putKey1Column2Value1 = new Put(toBytes(key1)); + putKey1Column2Value1.add(toBytes(family), toBytes(column2), toBytes(value1)); + + Put putKey1Column3Value1 = new Put(toBytes(key1)); + putKey1Column3Value1.add(toBytes(family), toBytes(column3), toBytes(value1)); + + Put putKey1Column4Value1 = new Put(toBytes(key1)); + putKey1Column4Value1.add(toBytes(family), toBytes(column4), toBytes(value1)); + + Put putKey1Column5Value1 = new Put(toBytes(key1)); + putKey1Column5Value1.add(toBytes(family), toBytes(column5), toBytes(value1)); + + Put putKey2Column1Value1 = new Put(toBytes(key2)); + putKey2Column1Value1.add(toBytes(family), toBytes(column1), toBytes(value1)); + + Put putKey2Column1Value2 = new Put(toBytes(key2)); + putKey2Column1Value2.add(toBytes(family), toBytes(column1), toBytes(value2)); + + Put putKey2Column2Value2 = new Put(toBytes(key2)); + putKey2Column2Value2.add(toBytes(family), toBytes(column2), toBytes(value2)); + + Put putKey2Column2Value1 = new Put(toBytes(key2)); + putKey2Column2Value1.add(toBytes(family), toBytes(column2), toBytes(value1)); + + hTable.delete(deleteKey1Family); + hTable.delete(deleteKey2Family); + tryPut(hTable, putKey1Column1Value1); + tryPut(hTable, putKey1Column1Value2); + tryPut(hTable, putKey1Column1Value1); + tryPut(hTable, putKey1Column2Value1); + tryPut(hTable, putKey1Column2Value2); + tryPut(hTable, putKey1Column2Value1); + tryPut(hTable, putKey1Column2Value2); + tryPut(hTable, putKey1Column3Value1); + tryPut(hTable, putKey1Column4Value1); + tryPut(hTable, putKey1Column5Value1); + tryPut(hTable, putKey2Column2Value1); + tryPut(hTable, putKey2Column2Value2); + + Scan scan; + scan = new Scan(); + scan.addFamily(family.getBytes()); + scan.setMaxVersions(10); + List> fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes("abab"), Bytes.toBytes("0000"))); + fuzzyKey.add(new Pair(Bytes.toBytes("dddd"), Bytes.toBytes("0000"))); + FuzzyRowFilter filter = new FuzzyRowFilter(fuzzyKey); + scan.setFilter(filter); + ResultScanner scanner = hTable.getScanner(scan); + + int res_count = 0; + for (Result result : scanner) { + for (KeyValue keyValue : result.raw()) { + System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n", + Bytes.toString(result.getRow()), + Bytes.toString(keyValue.getFamily()), + Bytes.toString(keyValue.getQualifier()), + keyValue.getTimestamp(), + Bytes.toString(keyValue.getValue()) + ); + res_count += 1; + } + } + Assert.assertEquals(res_count, 10); + scanner.close(); + + scan = new Scan(); + scan.addFamily(family.getBytes()); + scan.setMaxVersions(10); + scan.setReversed(true); + fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes("dddd"), Bytes.toBytes("0000"))); + fuzzyKey.add(new Pair(Bytes.toBytes("abcc"), Bytes.toBytes("0000"))); + filter = new FuzzyRowFilter(fuzzyKey); + scan.setFilter(filter); + scanner = hTable.getScanner(scan); + + res_count = 0; + for (Result result : scanner) { + for (KeyValue keyValue : result.raw()) { + System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n", + Bytes.toString(result.getRow()), + Bytes.toString(keyValue.getFamily()), + Bytes.toString(keyValue.getQualifier()), + keyValue.getTimestamp(), + Bytes.toString(keyValue.getValue()) + ); + res_count += 1; + } + } + Assert.assertEquals(res_count, 2); + scanner.close(); + + scan = new Scan(); + scan.addFamily(family.getBytes()); + scan.setMaxVersions(10); + scan.setReversed(true); + fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes("ccab"), Bytes.toBytes("1100"))); + fuzzyKey.add(new Pair(Bytes.toBytes("dddd"), Bytes.toBytes("0000"))); + filter = new FuzzyRowFilter(fuzzyKey); + scan.setFilter(filter); + scanner = hTable.getScanner(scan); + + res_count = 0; + for (Result result : scanner) { + for (KeyValue keyValue : result.raw()) { + System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n", + Bytes.toString(result.getRow()), + Bytes.toString(keyValue.getFamily()), + Bytes.toString(keyValue.getQualifier()), + keyValue.getTimestamp(), + Bytes.toString(keyValue.getValue()) + ); + res_count += 1; + } + } + Assert.assertEquals(res_count, 10); + scanner.close(); + + scan = new Scan(); + scan.addFamily(family.getBytes()); + scan.setMaxVersions(10); + scan.setReversed(true); + fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes("cccc"), Bytes.toBytes("1100"))); + fuzzyKey.add(new Pair(Bytes.toBytes("dddd"), Bytes.toBytes("0000"))); + filter = new FuzzyRowFilter(fuzzyKey); + scan.setFilter(filter); + scanner = hTable.getScanner(scan); + + res_count = 0; + for (Result result : scanner) { + for (KeyValue keyValue : result.raw()) { + System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n", + Bytes.toString(result.getRow()), + Bytes.toString(keyValue.getFamily()), + Bytes.toString(keyValue.getQualifier()), + keyValue.getTimestamp(), + Bytes.toString(keyValue.getValue()) + ); + res_count += 1; + } + } + Assert.assertEquals(res_count, 2); + scanner.close(); + + scan = new Scan(); + scan.addFamily(family.getBytes()); + scan.setMaxVersions(10); + scan.setReversed(true); + fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes("ab##"), Bytes.toBytes("0011"))); + fuzzyKey.add(new Pair(Bytes.toBytes("dddd"), Bytes.toBytes("0000"))); + filter = new FuzzyRowFilter(fuzzyKey); + scan.setFilter(filter); + scanner = hTable.getScanner(scan); + + res_count = 0; + for (Result result : scanner) { + for (KeyValue keyValue : result.raw()) { + System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n", + Bytes.toString(result.getRow()), + Bytes.toString(keyValue.getFamily()), + Bytes.toString(keyValue.getQualifier()), + keyValue.getTimestamp(), + Bytes.toString(keyValue.getValue()) + ); + res_count += 1; + } + } + Assert.assertEquals(res_count, 12); + scanner.close(); + + scan = new Scan(); + scan.addFamily(family.getBytes()); + scan.setMaxVersions(10); + scan.setReversed(true); + fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes("azc"), Bytes.toBytes("010"))); + fuzzyKey.add(new Pair(Bytes.toBytes("dddd"), Bytes.toBytes("0000"))); + filter = new FuzzyRowFilter(fuzzyKey); + scan.setFilter(filter); + scanner = hTable.getScanner(scan); + + res_count = 0; + for (Result result : scanner) { + for (KeyValue keyValue : result.raw()) { + System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n", + Bytes.toString(result.getRow()), + Bytes.toString(keyValue.getFamily()), + Bytes.toString(keyValue.getQualifier()), + keyValue.getTimestamp(), + Bytes.toString(keyValue.getValue()) + ); + res_count += 1; + } + } + Assert.assertEquals(res_count, 2); + scanner.close(); + + scan = new Scan(); + scan.addFamily(family.getBytes()); + scan.setMaxVersions(10); + scan.setReversed(true); + fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes("azccd"), Bytes.toBytes("01001"))); + fuzzyKey.add(new Pair(Bytes.toBytes("dddd"), Bytes.toBytes("0000"))); + filter = new FuzzyRowFilter(fuzzyKey); + scan.setFilter(filter); + scanner = hTable.getScanner(scan); + + res_count = 0; + for (Result result : scanner) { + for (KeyValue keyValue : result.raw()) { + System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n", + Bytes.toString(result.getRow()), + Bytes.toString(keyValue.getFamily()), + Bytes.toString(keyValue.getQualifier()), + keyValue.getTimestamp(), + Bytes.toString(keyValue.getValue()) + ); + res_count += 1; + } + } + Assert.assertEquals(res_count, 2); + scanner.close(); + + scan = new Scan(); + scan.addFamily(family.getBytes()); + scan.setMaxVersions(10); + scan.setReversed(true); + fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes(""), Bytes.toBytes(""))); + fuzzyKey.add(new Pair(Bytes.toBytes("dddd"), Bytes.toBytes("0000"))); + filter = new FuzzyRowFilter(fuzzyKey); + scan.setFilter(filter); + scanner = hTable.getScanner(scan); + + res_count = 0; + for (Result result : scanner) { + for (KeyValue keyValue : result.raw()) { + System.out.printf("Rowkey: %s, Column Family: %s, Column Qualifier: %s, Timestamp: %d, Value: %s%n", + Bytes.toString(result.getRow()), + Bytes.toString(keyValue.getFamily()), + Bytes.toString(keyValue.getQualifier()), + keyValue.getTimestamp(), + Bytes.toString(keyValue.getValue()) + ); + res_count += 1; + } + } + Assert.assertEquals(res_count, 12); + scanner.close(); + } + @Test public void testFirstKeyValueMatchingQualifiersFilter() throws Exception { String key1 = "getKey1"; diff --git a/src/test/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtilsTest.java b/src/test/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtilsTest.java index fe7a7082..f0e66604 100644 --- a/src/test/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtilsTest.java +++ b/src/test/java/com/alipay/oceanbase/hbase/filter/HBaseFilterUtilsTest.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hbase.filter.*; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -162,6 +163,17 @@ public void testColumnPrefixFilter() throws IOException { HBaseFilterUtils.toParseableByteArray(filter)); } + @Test + public void testFuzzyRowFilter() throws IOException { + List> fuzzyKey = new ArrayList<>(); + fuzzyKey.add(new Pair(Bytes.toBytes("abc"), Bytes.toBytes("101"))); + fuzzyKey.add(new Pair(Bytes.toBytes("ddd"), Bytes.toBytes("010"))); + + FuzzyRowFilter filter = new FuzzyRowFilter(fuzzyKey); + System.out.println(Bytes.toString(HBaseFilterUtils.toParseableByteArray(filter))); + Assert.assertArrayEquals("FuzzyRowFilter('abc','101','ddd','010')".getBytes(), HBaseFilterUtils.toParseableByteArray(filter)); + } + @Test public void testMultiRowRangeFilter() throws IOException { List ranges = new ArrayList<>();