diff --git a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/internal/RegexUtil.java b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/internal/RegexUtil.java new file mode 100644 index 0000000000..0c9b147d6c --- /dev/null +++ b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/internal/RegexUtil.java @@ -0,0 +1,79 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.internal; + +import com.google.api.core.InternalApi; +import com.google.protobuf.ByteString; +import com.google.protobuf.ByteString.ByteIterator; +import com.google.protobuf.ByteString.Output; +import java.io.IOException; +import java.io.OutputStream; + +@InternalApi +public final class RegexUtil { + private static final byte[] NULL_BYTES = "\\x00".getBytes(); + + private RegexUtil() {} + + public static String literalRegex(final String value) { + return literalRegex(ByteString.copyFromUtf8(value)).toStringUtf8(); + } + /** Converts the value to a quoted regular expression. */ + public static ByteString literalRegex(ByteString value) { + Output output = ByteString.newOutput(value.size() * 2); + + ByteIterator it = value.iterator(); + try { + writeLiteralRegex(it, output); + } catch (IOException e) { + throw new RuntimeException("Unexpected io error converting regex", e); + } + + return output.toByteString(); + } + + // Extracted from: re2 QuoteMeta: + // https://github.com/google/re2/blob/70f66454c255080a54a8da806c52d1f618707f8a/re2/re2.cc#L456 + private static void writeLiteralRegex(ByteIterator input, OutputStream output) + throws IOException { + while (input.hasNext()) { + byte unquoted = input.nextByte(); + + if ((unquoted < 'a' || unquoted > 'z') + && (unquoted < 'A' || unquoted > 'Z') + && (unquoted < '0' || unquoted > '9') + && unquoted != '_' + && + // If this is the part of a UTF8 or Latin1 character, we need + // to copy this byte without escaping. Experimentally this is + // what works correctly with the regexp library. + (unquoted & 128) == 0) { + + if (unquoted == '\0') { // Special handling for null chars. + // Note that this special handling is not strictly required for RE2, + // but this quoting is required for other regexp libraries such as + // PCRE. + // Can't use "\\0" since the next character might be a digit. + output.write(NULL_BYTES, 0, NULL_BYTES.length); + continue; + } + + output.write('\\'); + } + output.write(unquoted); + } + } +} diff --git a/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java new file mode 100644 index 0000000000..9793b74e0b --- /dev/null +++ b/bigtable-client-core-parent/bigtable-client-core/src/main/java/com/google/cloud/bigtable/data/v2/wrappers/Filters.java @@ -0,0 +1,659 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.wrappers; + +import com.google.api.client.repackaged.com.google.common.base.Preconditions; +import com.google.api.core.InternalApi; +import com.google.bigtable.v2.ColumnRange; +import com.google.bigtable.v2.RowFilter; +import com.google.bigtable.v2.TimestampRange; +import com.google.bigtable.v2.ValueRange; +import com.google.cloud.bigtable.data.v2.internal.RegexUtil; +import com.google.protobuf.ByteString; + +/** + * A Fluent DSL to create a hierarchy of filters for the CheckAndMutateRow RPCs and ReadRows Query. + * + *
Intended usage is to statically import, or in case of conflict assign the static variable F + * and use its fluent API to build filters. + * + *
Sample code: + * + *
{@code
+ * import static com.google.cloud.bigtable.data.v2.wrappers.Filters.F;
+ *
+ * void main() {
+ * // Build the filter expression
+ * RowFilter filter = F.chain()
+ * .filter(F.qualifier().regex("prefix.*"))
+ * .filter(F.limit().cellsPerRow(10));
+ * }
+ *
+ * }
+ */
+public final class Filters {
+ /** Entry point into the DSL. */
+ public static final Filters F = new Filters();
+
+ private static final SimpleFilter PASS =
+ new SimpleFilter(RowFilter.newBuilder().setPassAllFilter(true).build());
+ private static final SimpleFilter BLOCK =
+ new SimpleFilter(RowFilter.newBuilder().setBlockAllFilter(true).build());
+ private static final SimpleFilter SINK =
+ new SimpleFilter(RowFilter.newBuilder().setSink(true).build());
+ private static final SimpleFilter STRIP_VALUE =
+ new SimpleFilter(RowFilter.newBuilder().setStripValueTransformer(true).build());
+
+ private Filters() {}
+
+ /**
+ * Creates an empty chain filter list. Filters can be added to the chain by invoking {@link
+ * ChainFilter#filter(Filter)}.
+ *
+ * The elements of "filters" are chained together to process the input row: + * + *
{@code in row -> filter0 -> intermediate row -> filter1 -> ... -> filterN -> out row}
+ *
+ *
+ * The full chain is executed atomically.
+ */
+ public ChainFilter chain() {
+ return new ChainFilter();
+ }
+
+ /**
+ * Creates an empty interleave filter list. Filters can be added to the interleave by invoking
+ * {@link InterleaveFilter#filter(Filter)}.
+ *
+ * The elements of "filters" all process a copy of the input row, and the results are pooled, + * sorted, and combined into a single output row. If multiple cells are produced with the same + * column and timestamp, they will all appear in the output row in an unspecified mutual order. + * The full chain is executed atomically. + */ + public InterleaveFilter interleave() { + return new InterleaveFilter(); + } + + /** + * Creates an empty condition filter. The filter results of the predicate can be configured by + * invoking {@link ConditionFilter#then(Filter)} and {@link ConditionFilter#otherwise(Filter)}. + * + *
A RowFilter which evaluates one of two possible RowFilters, depending on whether or not a + * predicate RowFilter outputs any cells from the input row. + * + *
IMPORTANT NOTE: The predicate filter does not execute atomically with the {@link + * ConditionFilter#then(Filter)} and {@link ConditionFilter#otherwise(Filter)} (Filter)} filters, + * which may lead to inconsistent or unexpected results. Additionally, {@link ConditionFilter} may + * have poor performance, especially when filters are set for the {@link + * ConditionFilter#otherwise(Filter)}. + */ + public ConditionFilter condition(Filter predicate) { + return new ConditionFilter(predicate); + } + + /** Returns the builder for row key related filters. */ + public KeyFilter key() { + return new KeyFilter(); + } + + /** Returns the builder for column family related filters. */ + public FamilyFilter family() { + return new FamilyFilter(); + } + + /** Returns the builder for column qualifier related filters. */ + public QualifierFilter qualifier() { + return new QualifierFilter(); + } + + /** Returns the builder for timestamp related filters. */ + public TimestampFilter timestamp() { + return new TimestampFilter(); + } + + /** Returns the builder for value related filters. */ + public ValueFilter value() { + return new ValueFilter(); + } + + /** Returns the builder for offset related filters. */ + public OffsetFilter offset() { + return new OffsetFilter(); + } + + /** Returns the builder for limit related filters. */ + public LimitFilter limit() { + return new LimitFilter(); + } + + // Miscellaneous filters without a clear target. + /** Matches all cells, regardless of input. Functionally equivalent to having no filter. */ + public Filter pass() { + return PASS; + } + + /** + * Does not match any cells, regardless of input. Useful for temporarily disabling just part of a + * filter. + */ + public Filter block() { + return BLOCK; + } + + /** + * Outputs all cells directly to the output of the read rather than to any parent filter. For + * advanced usage, see comments in + * https://github.com/googleapis/googleapis/blob/master/google/bigtable/v2/data.proto for more + * details. + */ + public Filter sink() { + return SINK; + } + + /** + * Applies the given label to all cells in the output row. This allows the caller to determine + * which results were produced from which part of the filter. + * + *
Due to a technical limitation, it is not currently possible to apply multiple labels to a + * cell. As a result, a {@link ChainFilter} may have no more than one sub-filter which contains a + * label. It is okay for an {@link InterleaveFilter} to contain multiple labels, as they will be + * applied to separate copies of the input. This may be relaxed in the future. + */ + public Filter label(String label) { + return new SimpleFilter(RowFilter.newBuilder().setApplyLabelTransformer(label).build()); + } + + // Implementations of target specific filters. + /** DSL for adding filters to a chain. */ + public static final class ChainFilter implements Filter, Cloneable { + + private final RowFilter.Chain.Builder builder; + + private ChainFilter() { + this(RowFilter.Chain.newBuilder()); + } + + private ChainFilter(RowFilter.Chain.Builder builder) { + this.builder = builder; + } + + /** Add a filter to chain. */ + public ChainFilter filter(Filter filter) { + builder.addFilters(filter.toProto()); + return this; + } + + @InternalApi + @Override + public RowFilter toProto() { + if (builder.getFiltersCount() == 1) { + return builder.getFilters(0); + } else { + return RowFilter.newBuilder().setChain(builder.build()).build(); + } + } + + /** + * Makes a deep copy of the Chain. + */ + @Override + public ChainFilter clone() { + return new ChainFilter(builder.build().toBuilder()); + } + } + + /** DSL for adding filters to the interleave list. */ + public static final class InterleaveFilter implements Filter { + RowFilter.Interleave.Builder builder = RowFilter.Interleave.newBuilder(); + + private InterleaveFilter() { + } + + /** + * Adds a {@link Filter} to the interleave list. + */ + public InterleaveFilter filter(Filter filter) { + builder.addFilters(filter.toProto()); + return this; + } + + @InternalApi + @Override + public RowFilter toProto() { + if (builder.getFiltersCount() == 1) { + return builder.getFilters(0); + } else { + return RowFilter.newBuilder().setInterleave(builder.build()).build(); + } + } + } + + /** DSL for configuring a conditional filter. */ + public static final class ConditionFilter implements Filter { + private RowFilter.Condition.Builder builder = RowFilter.Condition.newBuilder(); + + private ConditionFilter() { + } + + private ConditionFilter(Filter predicate) { + builder.setPredicateFilter(predicate.toProto()); + } + + /** Sets (replaces) the filter to apply when the predicate is true. */ + public ConditionFilter then(Filter filter) { + builder.setTrueFilter(filter.toProto()); + return this; + } + + /** Sets (replaces) the filter to apply when the predicate is false. */ + public ConditionFilter otherwise(Filter filter) { + builder.setFalseFilter(filter.toProto()); + return this; + } + + @InternalApi + @Override + public RowFilter toProto() { + return RowFilter.newBuilder().setCondition(builder.build()).build(); + } + } + + public static final class KeyFilter { + + private KeyFilter() { + } + + /** + * Matches only cells from rows whose keys satisfy the given RE2 regex. In other words, passes through + * the entire row when the key matches, and otherwise produces an empty row. Note that, since + * row keys can contain arbitrary bytes, the `\C` escape sequence must be used if a true + * wildcard is desired. The `.` character will not match the new line character `\n`, which may + * be present in a binary key. + */ + public Filter regex(ByteString regex) { + return new SimpleFilter( + RowFilter.newBuilder().setRowKeyRegexFilter(regex).build()); + } + + /** + * Matches only cells from rows whose keys equal the value. In other words, passes through the + * entire row when the key matches, and otherwise produces an empty row. + */ + public Filter exactMatch(ByteString value) { + return new SimpleFilter( + RowFilter.newBuilder().setRowKeyRegexFilter(RegexUtil.literalRegex(value)).build()); + } + + /** + * Matches all cells from a row with `probability`, and matches no cells from the row with + * probability 1-`probability`. + */ + public Filter sample(double probability) { + return new SimpleFilter(RowFilter.newBuilder().setRowSampleFilter(probability).build()); + } + } + + public static final class FamilyFilter { + + private FamilyFilter() { + } + + /** + * Matches only cells from columns whose families satisfy the given RE2 regex. Note that, since column + * qualifiers can contain arbitrary bytes, the `\C` escape sequence must be used if a true + * wildcard is desired. The `.` character will not match the new line character `\n`, which may + * be present in a binary qualifier. + */ + public Filter regex(String regex) { + return regex(ByteString.copyFromUtf8(regex)); + } + + /** + * Matches only cells from columns whose qualifiers satisfy the given RE2 regex. Note that, since cell values + * can contain arbitrary bytes, the `\C` escape sequence must be used if a true wildcard is + * desired. The `.` character will not match the new line character `\n`, which may be present + * in a binary value. + * + * @param regex + * @return + */ + public Filter regex(String regex) { + return regex(ByteString.copyFromUtf8(regex)); + } + + /** Matches only cells with values that match the given value. */ + public Filter exactMatch(ByteString value) { + return regex(RegexUtil.literalRegex(value)); + } + + /** + * Matches only cells with values that satisfy the given