From 679f2660c2774ab124d6ab7f6e3c3a12ce89f3b0 Mon Sep 17 00:00:00 2001 From: Solomon Duskis Date: Mon, 22 Jan 2018 17:44:07 -0500 Subject: [PATCH 1/5] Adding a wrapper for RowFilter Filters is a new mechanism for creating `RowFilter` objects. Most of the credit goes to @igorbernstein. @igorbernstein, I had to include open/closed as first class citizens of Column and Value range filters. I had to remove the notion of a Range. Also, I replaced Truth with Assert, and used Long instead of log for timestmap range. --- .../bigtable/data/v2/internal/RegexUtil.java | 79 +++ .../bigtable/data/v2/wrappers/Filters.java | 494 ++++++++++++++++++ .../data/v2/wrappers/FiltersTest.java | 361 +++++++++++++ 3 files changed, 934 insertions(+) create mode 100644 bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/internal/RegexUtil.java create mode 100644 bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java create mode 100644 bigtable-client-core-parent/bigtable-client-core/src/test/java/com/google/cloud/bigtable/data/v2/wrappers/FiltersTest.java diff --git a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/internal/RegexUtil.java b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/internal/RegexUtil.java new file mode 100644 index 0000000000..0c9b147d6c --- /dev/null +++ b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/internal/RegexUtil.java @@ -0,0 +1,79 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.internal; + +import com.google.api.core.InternalApi; +import com.google.protobuf.ByteString; +import com.google.protobuf.ByteString.ByteIterator; +import com.google.protobuf.ByteString.Output; +import java.io.IOException; +import java.io.OutputStream; + +@InternalApi +public final class RegexUtil { + private static final byte[] NULL_BYTES = "\\x00".getBytes(); + + private RegexUtil() {} + + public static String literalRegex(final String value) { + return literalRegex(ByteString.copyFromUtf8(value)).toStringUtf8(); + } + /** Converts the value to a quoted regular expression. */ + public static ByteString literalRegex(ByteString value) { + Output output = ByteString.newOutput(value.size() * 2); + + ByteIterator it = value.iterator(); + try { + writeLiteralRegex(it, output); + } catch (IOException e) { + throw new RuntimeException("Unexpected io error converting regex", e); + } + + return output.toByteString(); + } + + // Extracted from: re2 QuoteMeta: + // https://github.com/google/re2/blob/70f66454c255080a54a8da806c52d1f618707f8a/re2/re2.cc#L456 + private static void writeLiteralRegex(ByteIterator input, OutputStream output) + throws IOException { + while (input.hasNext()) { + byte unquoted = input.nextByte(); + + if ((unquoted < 'a' || unquoted > 'z') + && (unquoted < 'A' || unquoted > 'Z') + && (unquoted < '0' || unquoted > '9') + && unquoted != '_' + && + // If this is the part of a UTF8 or Latin1 character, we need + // to copy this byte without escaping. Experimentally this is + // what works correctly with the regexp library. + (unquoted & 128) == 0) { + + if (unquoted == '\0') { // Special handling for null chars. + // Note that this special handling is not strictly required for RE2, + // but this quoting is required for other regexp libraries such as + // PCRE. + // Can't use "\\0" since the next character might be a digit. + output.write(NULL_BYTES, 0, NULL_BYTES.length); + continue; + } + + output.write('\\'); + } + output.write(unquoted); + } + } +} diff --git a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java new file mode 100644 index 0000000000..cc742ff4fa --- /dev/null +++ b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java @@ -0,0 +1,494 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.wrappers; + +import com.google.api.core.InternalApi; +import com.google.bigtable.v2.ColumnRange; +import com.google.bigtable.v2.RowFilter; +import com.google.bigtable.v2.TimestampRange; +import com.google.bigtable.v2.ValueRange; +import com.google.cloud.bigtable.data.v2.internal.RegexUtil; +import com.google.protobuf.ByteString; + +/** + * A Fluent DSL to create a hierarchy of filters for the CheckAndMutateRow RPCs and ReadRows Query. + * + *

Intended usage is to statically import, or in case of conflict assign the static variable F + * and use its fluent API to build filters. + * + *

Sample code: + * + *

{@code
+ * import static com.google.cloud.bigtable.data.v2.wrappers.Filters.F;
+ *
+ * void main() {
+ *   // Build the filter expression
+ *   RowFilter filter = F.chain()
+ *     .filter(F.qualifier().regex("prefix.*"))
+ *     .filter(F.limit().cellsPerRow(10));
+ * }
+ *
+ * }
+ */ +public final class Filters { + /** Entry point into the DSL. */ + public static final Filters F = new Filters(); + + private Filters() {} + + /** + * Creates an empty chain filter list. Filters can be added to the chain by invoking {@link + * ChainFilter#filter(Filter)}. + * + *

The elements of "filters" are chained together to process the input row: + * + *

{@code in row -> filter0 -> intermediate row -> filter1 -> ... -> filterN -> out row}
+   * 
+ * + * The full chain is executed atomically. + */ + public ChainFilter chain() { + return new ChainFilter(); + } + + /** + * Creates an empty interleave filter list. Filters can be added to the chain by invoking {@link + * InterleaveFilter#filter(Filter)}. + * + *

The elements of "filters" all process a copy of the input row, and the results are pooled, + * sorted, and combined into a single output row. If multiple cells are produced with the same + * column and timestamp, they will all appear in the output row in an unspecified mutual order. + * The full chain is executed atomically. + */ + public InterleaveFilter interleave() { + return new InterleaveFilter(); + } + + /** + * Creates an empty condition filter. The filter results of the predicate can be configured by + * invoking {@link ConditionFilter#then(Filter)} and {@link ConditionFilter#otherwise(Filter)}. + * + *

A RowFilter which evaluates one of two possible RowFilters, depending on whether or not a + * predicate RowFilter outputs any cells from the input row. + * + *

IMPORTANT NOTE: The predicate filter does not execute atomically with the {@link + * ConditionFilter#then(Filter)} and {@link ConditionFilter#otherwise(Filter)} (Filter)} filters, + * which may lead to inconsistent or unexpected results. Additionally, {@link ConditionFilter} may + * have poor performance, especially when filters are set for the {@link + * ConditionFilter#otherwise(Filter)}. + */ + public ConditionFilter condition(Filter predicate) { + return new ConditionFilter(predicate); + } + + /** Returns the builder for row key related filters. */ + public KeyFilter key() { + return new KeyFilter(); + } + + /** Returns the builder for column family related filters. */ + public FamilyFilter family() { + return new FamilyFilter(); + } + + /** Returns the builder for column qualifier related filters. */ + public QualifierFilter qualifier() { + return new QualifierFilter(); + } + + /** Returns the builder for timestamp related filters. */ + public TimestampFilter timestamp() { + return new TimestampFilter(); + } + + /** Returns the builder for value related filters. */ + public ValueFilter value() { + return new ValueFilter(); + } + + /** Returns the builder for offset related filters. */ + public OffsetFilter offset() { + return new OffsetFilter(); + } + + /** Returns the builder for limit related filters. */ + public LimitFilter limit() { + return new LimitFilter(); + } + + // Miscellaneous filters without a clear target. + /** Matches all cells, regardless of input. Functionally equivalent to having no filter. */ + public Filter pass() { + return new SimpleFilter(RowFilter.newBuilder().setPassAllFilter(true).build()); + } + + /** + * Does not match any cells, regardless of input. Useful for temporarily disabling just part of a + * filter. + */ + public Filter block() { + return new SimpleFilter(RowFilter.newBuilder().setBlockAllFilter(true).build()); + } + + /** + * Outputs all cells directly to the output of the read rather than to any parent filter. For + * advanced usage, see comments in + * https://github.com/googleapis/googleapis/blob/master/google/bigtable/v2/data.proto for more + * details. + */ + public Filter sink() { + return new SimpleFilter(RowFilter.newBuilder().setSink(true).build()); + } + + /** + * Applies the given label to all cells in the output row. This allows the caller to determine + * which results were produced from which part of the filter. + * + *

Due to a technical limitation, it is not currently possible to apply multiple labels to a + * cell. As a result, a {@link ChainFilter} may have no more than one sub-filter which contains a + * label. It is okay for an {@link InterleaveFilter} to contain multiple labels, as they will be + * applied to separate copies of the input. This may be relaxed in the future. + */ + public Filter label(String label) { + return new SimpleFilter(RowFilter.newBuilder().setApplyLabelTransformer(label).build()); + } + + // Implementations of target specific filters. + /** DSL for adding filters to a chain. */ + public static class ChainFilter extends Filter { + + RowFilter.Chain.Builder builder = RowFilter.Chain.newBuilder(); + + /** Add a filter to chain. */ + public ChainFilter filter(Filter filter) { + builder.addFilters(filter.toProto()); + return this; + } + + @InternalApi + @Override + public RowFilter toProto() { + return RowFilter.newBuilder().setChain(builder.build()).build(); + } + } + + /** DSL for adding filters to the interleave list. */ + public static class InterleaveFilter extends Filter { + RowFilter.Interleave.Builder builder = RowFilter.Interleave.newBuilder(); + + public InterleaveFilter filter(Filter filter) { + builder.addFilters(filter.toProto()); + return this; + } + + @InternalApi + @Override + public RowFilter toProto() { + return RowFilter.newBuilder().setInterleave(builder.build()).build(); + } + } + + /** DSL for configuring a conditional filter. */ + public static class ConditionFilter extends Filter { + RowFilter.Condition.Builder builder = RowFilter.Condition.newBuilder(); + + ConditionFilter(Filter predicate) { + builder.setPredicateFilter(predicate.toProto()); + } + + /** Sets (replaces) the filter to apply when the predicate is true. */ + public ConditionFilter then(Filter filter) { + builder.setTrueFilter(filter.toProto()); + return this; + } + + /** Sets (replaces) the filter to apply when the predicate is false. */ + public ConditionFilter otherwise(Filter filter) { + builder.setFalseFilter(filter.toProto()); + return this; + } + + @InternalApi + @Override + public RowFilter toProto() { + return RowFilter.newBuilder().setCondition(builder.build()).build(); + } + } + + public static class KeyFilter { + /** + * Matches only cells from rows whose keys satisfy the given RE2 regex. For technical reasons, the + * regex must not contain the `:` character, even if it is not being used as a literal. Note + * that, since column families cannot contain the new line character `\n`, it is sufficient to + * use `.` as a full wildcard when matching column family names. + */ + public Filter regex(String regex) { + return new SimpleFilter(RowFilter.newBuilder().setFamilyNameRegexFilter(regex).build()); + } + + /** Matches only cells from columns whose families match the value. */ + public Filter exactMatch(String value) { + return new SimpleFilter( + RowFilter.newBuilder().setFamilyNameRegexFilter(RegexUtil.literalRegex(value)).build()); + } + } + + public static class QualifierFilter { + + /** + * Matches only cells from columns whose qualifiers satisfy the given RE2 regex. Note that, since cell values + * can contain arbitrary bytes, the `\C` escape sequence must be used if a true wildcard is + * desired. The `.` character will not match the new line character `\n`, which may be present + * in a binary value. + * + * @param regex + * @return + */ + public Filter regex(String regex) { + return new SimpleFilter( + RowFilter.newBuilder().setValueRegexFilter(ByteString.copyFromUtf8(regex)).build()); + } + + /** Matches only cells with values that match the given value. */ + public Filter exactMatch(ByteString value) { + return new SimpleFilter( + RowFilter.newBuilder().setValueRegexFilter(RegexUtil.literalRegex(value)).build()); + } + + /** Matches only cells with values that fall within the given value range. */ + public static class RangeBuilder { + private ValueRange.Builder range = ValueRange.newBuilder(); + + public RangeBuilder startClosed(ByteString value) { + range.setStartValueClosed(value); + return this; + } + + public RangeBuilder startOpen(ByteString value) { + range.setStartValueOpen(value); + return this; + } + + public RangeBuilder endClosed(ByteString value) { + range.setEndValueClosed(value); + return this; + } + + public RangeBuilder endOpen(ByteString value) { + range.setEndValueOpen(value); + return this; + } + + public Filter build() { + return new SimpleFilter( + RowFilter.newBuilder().setValueRangeFilter(range.build()).build()); + } + } + + /** + * Construct a {@link RangeBuilder} that can create a {@link ValueRange} oriented + * {@link Filter}. + * @return a new {@link RangeBuilder} + */ + public RangeBuilder range() { + return new RangeBuilder(); + } + + /** Replaces each cell's value with the empty string. */ + public Filter strip() { + return new SimpleFilter(RowFilter.newBuilder().setStripValueTransformer(true).build()); + } + } + + public static class OffsetFilter { + + /** + * Skips the first N cells of each row, matching all subsequent cells. If duplicate cells are + * present, as is possible when using an {@link InterleaveFilter}, each copy of the cell is + * counted separately. + */ + public Filter cellsPerRow(int count) { + return new SimpleFilter(RowFilter.newBuilder().setCellsPerRowOffsetFilter(count).build()); + } + } + + public static class LimitFilter { + + /** + * Matches only the first N cells of each row. If duplicate cells are present, as is possible + * when using an Interleave, each copy of the cell is counted separately. + */ + public Filter cellsPerRow(int count) { + return new SimpleFilter(RowFilter.newBuilder().setCellsPerRowLimitFilter(count).build()); + } + + /** + * Matches only the most recent `count` cells within each column. For example, if count=2, this + * filter would match column `foo:bar` at timestamps 10 and 9 skip all earlier cells in + * `foo:bar`, and then begin matching again in column `foo:bar2`. If duplicate cells are + * present, as is possible when using an {@link InterleaveFilter}, each copy of the cell is + * counted separately. + */ + public Filter cellsPerColumn(int count) { + return new SimpleFilter(RowFilter.newBuilder().setCellsPerColumnLimitFilter(count).build()); + } + } + + private static final class SimpleFilter extends Filter { + + private final RowFilter proto; + + private SimpleFilter(RowFilter proto) { + this.proto = proto; + } + + @InternalApi + @Override + public RowFilter toProto() { + return proto; + } + } + + public abstract static class Filter { + + final RowFilter.Builder builder = RowFilter.newBuilder(); + + Filter() {} + + @InternalApi + public abstract RowFilter toProto(); + } +} diff --git a/bigtable-client-core-parent/bigtable-client-core/src/test/java/com/google/cloud/bigtable/data/v2/wrappers/FiltersTest.java b/bigtable-client-core-parent/bigtable-client-core/src/test/java/com/google/cloud/bigtable/data/v2/wrappers/FiltersTest.java new file mode 100644 index 0000000000..4406673210 --- /dev/null +++ b/bigtable-client-core-parent/bigtable-client-core/src/test/java/com/google/cloud/bigtable/data/v2/wrappers/FiltersTest.java @@ -0,0 +1,361 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.wrappers; + +import static com.google.cloud.bigtable.data.v2.wrappers.Filters.F; + +import com.google.bigtable.v2.ColumnRange; +import com.google.bigtable.v2.RowFilter; +import com.google.bigtable.v2.RowFilter.Chain; +import com.google.bigtable.v2.RowFilter.Condition; +import com.google.bigtable.v2.RowFilter.Interleave; +import com.google.bigtable.v2.ValueRange; +import com.google.protobuf.ByteString; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class FiltersTest { + @Test + public void chainTest() { + RowFilter actualProto = + F.chain() + .filter(F.key().regex(".*")) + .filter(F.key().sample(0.5)) + .filter(F.chain().filter(F.family().regex("hi$"))) + .toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setChain( + Chain.newBuilder() + .addFilters( + RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(".*"))) + .addFilters(RowFilter.newBuilder().setRowSampleFilter(0.5)) + .addFilters( + RowFilter.newBuilder() + .setChain( + Chain.newBuilder() + .addFilters( + RowFilter.newBuilder().setFamilyNameRegexFilter("hi$"))))) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void interleaveTest() { + RowFilter actualProto = + F.interleave() + .filter(F.key().regex(".*")) + .filter(F.key().sample(0.5)) + .filter(F.interleave().filter(F.family().regex("hi$"))) + .toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setInterleave( + Interleave.newBuilder() + .addFilters( + RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(".*"))) + .addFilters(RowFilter.newBuilder().setRowSampleFilter(0.5)) + .addFilters( + RowFilter.newBuilder() + .setInterleave( + Interleave.newBuilder() + .addFilters( + RowFilter.newBuilder().setFamilyNameRegexFilter("hi$"))))) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void conditionTest() { + RowFilter actualProto = + F.condition(F.key().regex(".*")) + .then(F.label("true")) + .otherwise(F.label("false")) + .toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setCondition( + Condition.newBuilder() + .setPredicateFilter( + RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(".*"))) + .setTrueFilter(RowFilter.newBuilder().setApplyLabelTransformer("true")) + .setFalseFilter(RowFilter.newBuilder().setApplyLabelTransformer("false"))) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void passTest() { + RowFilter actualProto = F.pass().toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setPassAllFilter(true).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void blockTest() { + RowFilter actualProto = F.block().toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setBlockAllFilter(true).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void sinkTest() { + RowFilter actualProto = F.sink().toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setSink(true).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void labelTest() { + RowFilter actualProto = F.label("my-label").toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setApplyLabelTransformer("my-label").build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void keyRegexTest() { + RowFilter actualProto = F.key().regex(".*").toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(".*")).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void keyExactMatchTest() { + RowFilter actualProto = F.key().exactMatch(ByteString.copyFromUtf8(".*")).toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8("\\.\\*")).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void keySampleTest() { + RowFilter actualProto = F.key().sample(0.3).toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setRowSampleFilter(0.3).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void familyRegexTest() { + RowFilter actualProto = F.family().regex("^hi").toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setFamilyNameRegexFilter("^hi").build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void familyExactMatchTest() { + RowFilter actualProto = F.family().exactMatch("^hi").toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setFamilyNameRegexFilter("\\^hi").build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void qualifierRegexTest() { + RowFilter actualProto = F.qualifier().regex("^hi").toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setColumnQualifierRegexFilter(ByteString.copyFromUtf8("^hi")) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void qualifierExactMatchTest() { + RowFilter actualProto = F.qualifier().exactMatch(ByteString.copyFromUtf8("^hi")).toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setColumnQualifierRegexFilter(ByteString.copyFromUtf8("\\^hi")) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void qualifierRangeOpenClosed() { + ByteString begin = ByteString.copyFromUtf8("begin"); + ByteString end = ByteString.copyFromUtf8("end"); + RowFilter actualProto = F.qualifier().range() + .startOpen(begin) + .endClosed(end) + .build() + .toProto(); + RowFilter expectedFilter = + RowFilter.newBuilder() + .setColumnRangeFilter( + ColumnRange.newBuilder() + .setStartQualifierOpen(begin) + .setEndQualifierClosed(end)) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void qualifierRangeClosedOpen() { + ByteString begin = ByteString.copyFromUtf8("begin"); + ByteString end = ByteString.copyFromUtf8("end"); + RowFilter actualProto = F.qualifier().range() + .startClosed(begin) + .endOpen(end) + .build() + .toProto(); + RowFilter expectedFilter = + RowFilter.newBuilder() + .setColumnRangeFilter( + ColumnRange.newBuilder() + .setStartQualifierClosed(begin) + .setEndQualifierOpen(end)) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void valueRegex() { + RowFilter actualProto = F.value().regex("some[0-9]regex").toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setValueRegexFilter(ByteString.copyFromUtf8("some[0-9]regex")) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void valueExactMatch() { + RowFilter actualProto = + F.value().exactMatch(ByteString.copyFromUtf8("some[0-9]regex")).toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setValueRegexFilter(ByteString.copyFromUtf8("some\\[0\\-9\\]regex")) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void valueRangeClosedOpen() { + ByteString begin = ByteString.copyFromUtf8("begin"); + ByteString end = ByteString.copyFromUtf8("end"); + + RowFilter actualProto = F.value().range() + .startOpen(begin) + .endClosed(end) + .build() + .toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setValueRangeFilter( + ValueRange.newBuilder() + .setStartValueOpen(begin) + .setEndValueClosed(end)) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void valueRangeOpenClosed() { + ByteString begin = ByteString.copyFromUtf8("begin"); + ByteString end = ByteString.copyFromUtf8("end"); + + RowFilter actualProto = F.value().range() + .startClosed(begin) + .endOpen(end) + .build() + .toProto(); + + RowFilter expectedFilter = + RowFilter.newBuilder() + .setValueRangeFilter( + ValueRange.newBuilder() + .setStartValueClosed(begin) + .setEndValueOpen(end)) + .build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void valueStripTest() { + RowFilter actualProto = F.value().strip().toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setStripValueTransformer(true).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void offsetCellsPerRowTest() { + RowFilter actualProto = F.offset().cellsPerRow(10).toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setCellsPerRowOffsetFilter(10).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void limitCellsPerRowTest() { + RowFilter actualProto = F.limit().cellsPerRow(10).toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setCellsPerRowLimitFilter(10).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } + + @Test + public void limitCellsPerColumnTest() { + RowFilter actualProto = F.limit().cellsPerColumn(10).toProto(); + + RowFilter expectedFilter = RowFilter.newBuilder().setCellsPerColumnLimitFilter(10).build(); + + Assert.assertEquals(expectedFilter, actualProto); + } +} From fde35471ecb95dc824c96dfcfbb9ff4b55c0dd3f Mon Sep 17 00:00:00 2001 From: Solomon Duskis Date: Mon, 22 Jan 2018 19:59:51 -0500 Subject: [PATCH 2/5] ColumnRange needs a family. --- .../com/google/cloud/bigtable/data/v2/wrappers/Filters.java | 5 +++++ .../google/cloud/bigtable/data/v2/wrappers/FiltersTest.java | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java index cc742ff4fa..e704015598 100644 --- a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java +++ b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java @@ -310,6 +310,11 @@ public Filter exactMatch(ByteString value) { public static class RangeBuilder { private ColumnRange.Builder range = ColumnRange.newBuilder(); + public RangeBuilder family(String family) { + range.setFamilyName(family); + return this; + } + public RangeBuilder startClosed(ByteString value) { range.setStartQualifierClosed(value); return this; diff --git a/bigtable-client-core-parent/bigtable-client-core/src/test/java/com/google/cloud/bigtable/data/v2/wrappers/FiltersTest.java b/bigtable-client-core-parent/bigtable-client-core/src/test/java/com/google/cloud/bigtable/data/v2/wrappers/FiltersTest.java index 4406673210..a0585945d9 100644 --- a/bigtable-client-core-parent/bigtable-client-core/src/test/java/com/google/cloud/bigtable/data/v2/wrappers/FiltersTest.java +++ b/bigtable-client-core-parent/bigtable-client-core/src/test/java/com/google/cloud/bigtable/data/v2/wrappers/FiltersTest.java @@ -216,9 +216,11 @@ public void qualifierExactMatchTest() { @Test public void qualifierRangeOpenClosed() { + String family = "family"; ByteString begin = ByteString.copyFromUtf8("begin"); ByteString end = ByteString.copyFromUtf8("end"); RowFilter actualProto = F.qualifier().range() + .family(family) .startOpen(begin) .endClosed(end) .build() @@ -227,6 +229,7 @@ public void qualifierRangeOpenClosed() { RowFilter.newBuilder() .setColumnRangeFilter( ColumnRange.newBuilder() + .setFamilyName(family) .setStartQualifierOpen(begin) .setEndQualifierClosed(end)) .build(); @@ -236,9 +239,11 @@ public void qualifierRangeOpenClosed() { @Test public void qualifierRangeClosedOpen() { + String family = "family"; ByteString begin = ByteString.copyFromUtf8("begin"); ByteString end = ByteString.copyFromUtf8("end"); RowFilter actualProto = F.qualifier().range() + .family(family) .startClosed(begin) .endOpen(end) .build() @@ -247,6 +252,7 @@ public void qualifierRangeClosedOpen() { RowFilter.newBuilder() .setColumnRangeFilter( ColumnRange.newBuilder() + .setFamilyName(family) .setStartQualifierClosed(begin) .setEndQualifierOpen(end)) .build(); From c611e92620c6c90c7f87d438dd9264d21a2e69d9 Mon Sep 17 00:00:00 2001 From: Solomon Duskis Date: Tue, 23 Jan 2018 11:24:35 -0500 Subject: [PATCH 3/5] Addressing comments - Moved up QualifierRangeFilter and ValueRangeFilter - Added family to QualifierRangeFilter's constructor. - Added ByteString regex methods. - Made a TimestampRangeFilter. --- .../bigtable/data/v2/wrappers/Filters.java | 280 ++++++++++++------ .../data/v2/wrappers/FiltersTest.java | 68 ++++- 2 files changed, 240 insertions(+), 108 deletions(-) diff --git a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java index e704015598..0b1975f991 100644 --- a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java +++ b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java @@ -15,6 +15,7 @@ */ package com.google.cloud.bigtable.data.v2.wrappers; +import com.google.api.client.repackaged.com.google.common.base.Preconditions; import com.google.api.core.InternalApi; import com.google.bigtable.v2.ColumnRange; import com.google.bigtable.v2.RowFilter; @@ -47,10 +48,19 @@ public final class Filters { /** Entry point into the DSL. */ public static final Filters F = new Filters(); + private static final SimpleFilter PASS = + new SimpleFilter(RowFilter.newBuilder().setPassAllFilter(true).build()); + private static final SimpleFilter BLOCK = + new SimpleFilter(RowFilter.newBuilder().setBlockAllFilter(true).build()); + private static final SimpleFilter SINK = + new SimpleFilter(RowFilter.newBuilder().setSink(true).build()); + private static final SimpleFilter STRIP_VALUE = + new SimpleFilter(RowFilter.newBuilder().setStripValueTransformer(true).build()); + private Filters() {} /** - * Creates an empty chain filter list. Filters can be added to the chain by invoking {@link + * Creates an empty chain filter lidst. Filters can be added to the chain by invoking {@link * ChainFilter#filter(Filter)}. * *

The elements of "filters" are chained together to process the input row: @@ -65,8 +75,8 @@ public ChainFilter chain() { } /** - * Creates an empty interleave filter list. Filters can be added to the chain by invoking {@link - * InterleaveFilter#filter(Filter)}. + * Creates an empty interleave filter list. Filters can be added to the interleave by invoking + * {@link InterleaveFilter#filter(Filter)}. * *

The elements of "filters" all process a copy of the input row, and the results are pooled, * sorted, and combined into a single output row. If multiple cells are produced with the same @@ -132,7 +142,7 @@ public LimitFilter limit() { // Miscellaneous filters without a clear target. /** Matches all cells, regardless of input. Functionally equivalent to having no filter. */ public Filter pass() { - return new SimpleFilter(RowFilter.newBuilder().setPassAllFilter(true).build()); + return PASS; } /** @@ -140,7 +150,7 @@ public Filter pass() { * filter. */ public Filter block() { - return new SimpleFilter(RowFilter.newBuilder().setBlockAllFilter(true).build()); + return BLOCK; } /** @@ -150,7 +160,7 @@ public Filter block() { * details. */ public Filter sink() { - return new SimpleFilter(RowFilter.newBuilder().setSink(true).build()); + return SINK; } /** @@ -170,7 +180,15 @@ public Filter label(String label) { /** DSL for adding filters to a chain. */ public static class ChainFilter extends Filter { - RowFilter.Chain.Builder builder = RowFilter.Chain.newBuilder(); + final RowFilter.Chain.Builder builder; + + public ChainFilter() { + this(RowFilter.Chain.newBuilder()); + } + + ChainFilter(RowFilter.Chain.Builder builder) { + this.builder = builder; + } /** Add a filter to chain. */ public ChainFilter filter(Filter filter) { @@ -181,7 +199,16 @@ public ChainFilter filter(Filter filter) { @InternalApi @Override public RowFilter toProto() { - return RowFilter.newBuilder().setChain(builder.build()).build(); + if (builder.getFiltersCount() == 1) { + return builder.getFilters(0); + } else { + return RowFilter.newBuilder().setChain(builder.build()).build(); + } + } + + @Override + public ChainFilter clone() { + return new ChainFilter(builder.build().toBuilder()); } } @@ -197,7 +224,11 @@ public InterleaveFilter filter(Filter filter) { @InternalApi @Override public RowFilter toProto() { - return RowFilter.newBuilder().setInterleave(builder.build()).build(); + if (builder.getFiltersCount() == 1) { + return builder.getFilters(0); + } else { + return RowFilter.newBuilder().setInterleave(builder.build()).build(); + } } } @@ -238,8 +269,20 @@ public static class KeyFilter { * be present in a binary key. */ public Filter regex(String regex) { + return regex(ByteString.copyFromUtf8(regex)); + } + + /** + * Matches only cells from rows whose keys satisfy the given RE2 regex. Note that, since column + * qualifiers can contain arbitrary bytes, the `\C` escape sequence must be used if a true + * wildcard is desired. The `.` character will not match the new line character `\n`, which may + * be present in a binary qualifier. + */ + public Filter regex(ByteString regex) { return new SimpleFilter( RowFilter.newBuilder() - .setColumnQualifierRegexFilter(ByteString.copyFromUtf8(regex)) + .setColumnQualifierRegexFilter(regex) .build()); } @@ -305,69 +395,84 @@ public Filter exactMatch(ByteString value) { } /** - * Matches only cells from columns within the given range. + * Construct a {@link QualifierRangeFilter} that can create a {@link ColumnRange} oriented + * {@link Filter}. + * @return a new {@link QualifierRangeFilter} */ - public static class RangeBuilder { - private ColumnRange.Builder range = ColumnRange.newBuilder(); - - public RangeBuilder family(String family) { - range.setFamilyName(family); - return this; - } + public QualifierRangeFilter range(String family) { + return new QualifierRangeFilter(family); + } + } - public RangeBuilder startClosed(ByteString value) { - range.setStartQualifierClosed(value); - return this; - } + /** + * Matches only cells with microsecond timestamps within the given range. Start is inclusive + * and end is exclusive. + */ - public RangeBuilder startOpen(ByteString value) { - range.setStartQualifierOpen(value); - return this; - } + public static class TimestampRangeFilter extends Filter { + private final TimestampRange.Builder range = TimestampRange.newBuilder(); - public RangeBuilder endClosed(ByteString value) { - range.setEndQualifierClosed(value); - return this; - } - - public RangeBuilder endOpen(ByteString value) { - range.setEndQualifierOpen(value); - return this; - } + public TimestampRangeFilter startClosed(long startMicros) { + range.setStartTimestampMicros(startMicros); + return this; + } - public Filter build() { - return new SimpleFilter( - RowFilter.newBuilder().setColumnRangeFilter(range.build()).build()); - } + public TimestampRangeFilter endOpen(long endMicros) { + range.setEndTimestampMicros(endMicros); + return this; } - /** - * Construct a {@link RangeBuilder} that can create a {@link ColumnRange} oriented - * {@link Filter}. - * @return a new {@link RangeBuilder} - */ - public RangeBuilder range() { - return new RangeBuilder(); + @Override + public RowFilter toProto() { + return + RowFilter.newBuilder().setTimestampRangeFilter(range.build()).build(); } } public static class TimestampFilter { + + public TimestampRangeFilter range() { + return new TimestampRangeFilter(); + } + /** * Matches only cells with timestamps within the given range. * - * @param start Inclusive start of the range. - * @param end Exclusive end of the range. + * @param startMicros Inclusive start of the range in microseconds. + * @param endMicros Exclusive end of the range in microseconds. */ - public Filter range(Long start, Long end) { - TimestampRange.Builder range = TimestampRange.newBuilder(); - if (start != null) { - range.setStartTimestampMicros(start); - } - if (end != null) { - range.setEndTimestampMicros(end); - } - return new SimpleFilter( - RowFilter.newBuilder().setTimestampRangeFilter(range.build()).build()); + public Filter range(long startMicros, long endMicros) { + return range().startClosed(startMicros).endOpen(endMicros); + } + } + + /** Matches only cells with values that fall within the given value range. */ + public static class ValueRangeFilter extends Filter{ + private ValueRange.Builder range = ValueRange.newBuilder(); + + public ValueRangeFilter startClosed(ByteString value) { + range.setStartValueClosed(value); + return this; + } + + public ValueRangeFilter startOpen(ByteString value) { + range.setStartValueOpen(value); + return this; + } + + public ValueRangeFilter endClosed(ByteString value) { + range.setEndValueClosed(value); + return this; + } + + public ValueRangeFilter endOpen(ByteString value) { + range.setEndValueOpen(value); + return this; + } + + @Override + public RowFilter toProto() { + return RowFilter.newBuilder().setValueRangeFilter(range.build()).build(); } } @@ -383,8 +488,21 @@ public static class ValueFilter { * @return */ public Filter regex(String regex) { - return new SimpleFilter( - RowFilter.newBuilder().setValueRegexFilter(ByteString.copyFromUtf8(regex)).build()); + return regex(ByteString.copyFromUtf8(regex)); + } + + /** + * Matches only cells with values that satisfy the given Date: Tue, 23 Jan 2018 16:44:17 -0500 Subject: [PATCH 4/5] Filter is now an interface, and addressing comments - Filter had a RowFilter.Builder that was never used. Removing it left just the toProt() method, which indicates that Filter should be an interface - All constructors should now be private - All classes should be final - All public methods other than toProto should have a comment. --- .../bigtable/data/v2/wrappers/Filters.java | 110 +++++++++++++----- 1 file changed, 82 insertions(+), 28 deletions(-) diff --git a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java index 0b1975f991..799c2f90a5 100644 --- a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java +++ b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java @@ -60,7 +60,7 @@ public final class Filters { private Filters() {} /** - * Creates an empty chain filter lidst. Filters can be added to the chain by invoking {@link + * Creates an empty chain filter list. Filters can be added to the chain by invoking {@link * ChainFilter#filter(Filter)}. * *

The elements of "filters" are chained together to process the input row: @@ -178,15 +178,15 @@ public Filter label(String label) { // Implementations of target specific filters. /** DSL for adding filters to a chain. */ - public static class ChainFilter extends Filter { + public static final class ChainFilter implements Filter, Cloneable { - final RowFilter.Chain.Builder builder; + private final RowFilter.Chain.Builder builder; - public ChainFilter() { + private ChainFilter() { this(RowFilter.Chain.newBuilder()); } - ChainFilter(RowFilter.Chain.Builder builder) { + private ChainFilter(RowFilter.Chain.Builder builder) { this.builder = builder; } @@ -206,6 +206,9 @@ public RowFilter toProto() { } } + /** + * Makes a deep copy of the Chain. + */ @Override public ChainFilter clone() { return new ChainFilter(builder.build().toBuilder()); @@ -213,9 +216,15 @@ public ChainFilter clone() { } /** DSL for adding filters to the interleave list. */ - public static class InterleaveFilter extends Filter { + public static final class InterleaveFilter implements Filter { RowFilter.Interleave.Builder builder = RowFilter.Interleave.newBuilder(); + private InterleaveFilter() { + } + + /** + * Adds a {@link Filter} to the interleave list. + */ public InterleaveFilter filter(Filter filter) { builder.addFilters(filter.toProto()); return this; @@ -233,10 +242,10 @@ public RowFilter toProto() { } /** DSL for configuring a conditional filter. */ - public static class ConditionFilter extends Filter { + public static final class ConditionFilter implements Filter { RowFilter.Condition.Builder builder = RowFilter.Condition.newBuilder(); - ConditionFilter(Filter predicate) { + private ConditionFilter(Filter predicate) { builder.setPredicateFilter(predicate.toProto()); } @@ -259,7 +268,11 @@ public RowFilter toProto() { } } - public static class KeyFilter { + public static final class KeyFilter { + + private KeyFilter() { + } + /** * Matches only cells from rows whose keys satisfy the given RE2 regex. Note that, since column @@ -408,20 +436,33 @@ public QualifierRangeFilter range(String family) { * Matches only cells with microsecond timestamps within the given range. Start is inclusive * and end is exclusive. */ - - public static class TimestampRangeFilter extends Filter { + public static final class TimestampRangeFilter implements Filter { private final TimestampRange.Builder range = TimestampRange.newBuilder(); + private TimestampRangeFilter() { + } + + /** + * Inclusive lower bound. If left empty, interpreted as 0. + * + * @param startMicros inclusive timestamp in microseconds. + */ public TimestampRangeFilter startClosed(long startMicros) { range.setStartTimestampMicros(startMicros); return this; } + /** + * Exclusive upper bound. If left empty, interpreted as infinity. + * + * @param endMicros exclusive timestamp in microseconds. + */ public TimestampRangeFilter endOpen(long endMicros) { range.setEndTimestampMicros(endMicros); return this; } + @InternalApi @Override public RowFilter toProto() { return @@ -429,8 +470,13 @@ public RowFilter toProto() { } } - public static class TimestampFilter { + public static final class TimestampFilter { + /** + * Matches only cells with timestamps within the given range. + * + * @return a {@link TimestampRangeFilter} on which start / end timestamps can be specified. + */ public TimestampRangeFilter range() { return new TimestampRangeFilter(); } @@ -447,36 +493,49 @@ public Filter range(long startMicros, long endMicros) { } /** Matches only cells with values that fall within the given value range. */ - public static class ValueRangeFilter extends Filter{ + public static final class ValueRangeFilter implements Filter{ private ValueRange.Builder range = ValueRange.newBuilder(); + /** + * Used when giving an inclusive lower bound for the range. + */ public ValueRangeFilter startClosed(ByteString value) { range.setStartValueClosed(value); return this; } + /** + * Used when giving an exclusive lower bound for the range. + */ public ValueRangeFilter startOpen(ByteString value) { range.setStartValueOpen(value); return this; } + /** + * Used when giving an inclusive upper bound for the range. + */ public ValueRangeFilter endClosed(ByteString value) { range.setEndValueClosed(value); return this; } public ValueRangeFilter endOpen(ByteString value) { + /** + * Used when giving an exclusive upper bound for the range. + */ range.setEndValueOpen(value); return this; } + @InternalApi @Override public RowFilter toProto() { return RowFilter.newBuilder().setValueRangeFilter(range.build()).build(); } } - public static class ValueFilter { + public static final class ValueFilter { /** * Matches only cells with values that satisfy the given RE2 regex. Note that, since column @@ -416,10 +423,7 @@ public Filter regex(ByteString regex) { /** Matches only cells from columns whose qualifiers equal the value. */ public Filter exactMatch(ByteString value) { - return new SimpleFilter( - RowFilter.newBuilder() - .setColumnQualifierRegexFilter(RegexUtil.literalRegex(value)) - .build()); + return regex(RegexUtil.literalRegex(value)); } /** @@ -465,13 +469,15 @@ public TimestampRangeFilter endOpen(long endMicros) { @InternalApi @Override public RowFilter toProto() { - return - RowFilter.newBuilder().setTimestampRangeFilter(range.build()).build(); + return RowFilter.newBuilder().setTimestampRangeFilter(range.build()).build(); } } public static final class TimestampFilter { + private TimestampFilter() { + } + /** * Matches only cells with timestamps within the given range. * @@ -496,6 +502,9 @@ public Filter range(long startMicros, long endMicros) { public static final class ValueRangeFilter implements Filter{ private ValueRange.Builder range = ValueRange.newBuilder(); + private ValueRangeFilter() { + } + /** * Used when giving an inclusive lower bound for the range. */ @@ -536,6 +545,10 @@ public RowFilter toProto() { } public static final class ValueFilter { + + private ValueFilter() { + } + /** * Matches only cells with values that satisfy the given RE2 regex. Note that, since cell values @@ -564,12 +582,6 @@ public Filter regex(ByteString regex) { return new SimpleFilter(RowFilter.newBuilder().setValueRegexFilter(regex).build()); } - /** Matches only cells with values that match the given value. */ - public Filter exactMatch(ByteString value) { - return new SimpleFilter( - RowFilter.newBuilder().setValueRegexFilter(RegexUtil.literalRegex(value)).build()); - } - /** * Construct a {@link ValueRangeBuilder} that can create a {@link ValueRange} oriented * {@link Filter}. @@ -587,6 +599,9 @@ public Filter strip() { public static final class OffsetFilter { + private OffsetFilter() { + } + /** * Skips the first N cells of each row, matching all subsequent cells. If duplicate cells are * present, as is possible when using an {@link InterleaveFilter}, each copy of the cell is @@ -599,6 +614,9 @@ public Filter cellsPerRow(int count) { public static final class LimitFilter { + private LimitFilter() { + } + /** * Matches only the first N cells of each row. If duplicate cells are present, as is possible * when using an Interleave, each copy of the cell is counted separately.