diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
index 218dc06db8..778083f0b9 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
+++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
@@ -76,7 +76,6 @@ limitations under the License.
         </exclusion>
       </exclusions>
     </dependency>
-
     <dependency>
       <groupId>org.apache.beam</groupId>
       <artifactId>beam-sdks-java-core</artifactId>
@@ -217,6 +216,23 @@ limitations under the License.
       <version>${hbase.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>com.google.truth</groupId>
+      <artifactId>truth</artifactId>
+      <version>1.0.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.cloud</groupId>
+      <artifactId>google-cloud-bigtable-emulator</artifactId>
+      <version>0.124.0</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${jsr305.version}</version>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java
index b346b90837..1f52f5125a 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java
@@ -21,6 +21,7 @@
 import com.google.cloud.bigtable.beam.sequencefiles.CreateTableHelper;
 import com.google.cloud.bigtable.beam.sequencefiles.ExportJob;
 import com.google.cloud.bigtable.beam.sequencefiles.ImportJob;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob;
 import java.io.File;
 import java.net.URISyntaxException;
 import java.util.Arrays;
@@ -53,6 +54,9 @@ public static void main(String[] args) throws Exception {
       case "create-table":
         CreateTableHelper.main(subArgs);
         break;
+      case "sync-table":
+        SyncTableJob.main(subArgs);
+        break;
       default:
         usage();
         System.exit(1);
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
index e64507317b..f839a50b23 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
@@ -26,6 +26,7 @@
 import com.google.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.Query;
 import com.google.cloud.bigtable.beam.sequencefiles.ExportJob.ExportOptions;
 import com.google.cloud.bigtable.beam.sequencefiles.ImportJob.ImportOptions;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
 import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
 import com.google.cloud.bigtable.hbase.adapters.Adapters;
 import com.google.cloud.bigtable.hbase.adapters.read.DefaultReadHooks;
@@ -72,6 +73,19 @@ public static CloudBigtableTableConfiguration BuildImportConfig(ImportOptions op
     return builder.build();
   }
 
+  /** Builds CloudBigtableTableConfiguration from input runtime parameters for import job. */
+  public static CloudBigtableTableConfiguration BuildSyncTableConfig(SyncTableOptions opts) {
+    CloudBigtableTableConfiguration.Builder builder =
+        new CloudBigtableTableConfiguration.Builder()
+            .withProjectId(opts.getBigtableProject())
+            .withInstanceId(opts.getBigtableInstanceId())
+            .withTableId(opts.getBigtableTableId());
+    if (opts.getBigtableAppProfileId() != null) {
+      builder.withAppProfileId(opts.getBigtableAppProfileId());
+    }
+    return builder.build();
+  }
+
   /** Provides a request that is constructed with some attributes. */
   private static class RequestValueProvider
       implements ValueProvider<ReadRowsRequest>, Serializable {
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
new file mode 100644
index 0000000000..e62b3c8215
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Buffers the RangeHashes generated by {@link HadoopHashTableSource}. This is an optimization that
+ * allows {@link ComputeAndValidateHashFromBigtableDoFn} to issue fewer ReadRow APIs with larger row
+ * ranges.
+ *
+ * <p>Hadoop HashTable output is sorted by row-key and contains a row-range and hash. Beam
+ * Pcollection do not guarantee any ordering. To fetch a batch of ranges in 1 ReadRows operation,
+ * this source buffers then and outputs a List<RangeHash> guaranteeing the sorted order of ranges.
+ *
+ * <p>Emits a batch of sorted RangeHashes keyed by the start key of the first range.
+ */
+class BufferedHadoopHashTableSource extends BoundedSource<KV<String, List<RangeHash>>> {
+
+  private static final long serialVersionUID = 39842743L;
+
+  private static final int DEFAULT_BATCH_SIZE = 50;
+  private static final Coder<KV<String, List<RangeHash>>> CODER =
+      KvCoder.of(StringUtf8Coder.of(), ListCoder.of(RangeHashCoder.of()));;
+
+  // Max number of RangeHashes to buffer.
+  private final int maxBufferSize;
+  private final HadoopHashTableSource hashTableSource;
+
+  public BufferedHadoopHashTableSource(HadoopHashTableSource source) {
+    this(source, DEFAULT_BATCH_SIZE);
+  }
+
+  public BufferedHadoopHashTableSource(HadoopHashTableSource hashTableSource, int maxBufferSize) {
+    this.hashTableSource = hashTableSource;
+    this.maxBufferSize = maxBufferSize;
+  }
+
+  @Override
+  public List<? extends BoundedSource<KV<String, List<RangeHash>>>> split(
+      long desiredBundleSizeBytes, PipelineOptions options) throws IOException {
+
+    @SuppressWarnings("unchecked")
+    List<HadoopHashTableSource> splitHashTableSources =
+        (List<HadoopHashTableSource>) hashTableSource.split(desiredBundleSizeBytes, options);
+
+    List<BufferedHadoopHashTableSource> splitSources =
+        new ArrayList<>(splitHashTableSources.size());
+    // Keep the splits same as HashTableSource.
+    for (HadoopHashTableSource splitHashTableSource : splitHashTableSources) {
+      // Add the last range for [lastPartition, stopRow).
+      splitSources.add(new BufferedHadoopHashTableSource(splitHashTableSource));
+    }
+    return splitSources;
+  }
+
+  @Override
+  public Coder<KV<String, List<RangeHash>>> getOutputCoder() {
+    return CODER;
+  }
+
+  @Override
+  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    // HashTable data files don't expose a method to estimate size or lineCount.
+    return hashTableSource.getEstimatedSizeBytes(options);
+  }
+
+  @Override
+  public BoundedReader<KV<String, List<RangeHash>>> createReader(PipelineOptions options)
+      throws IOException {
+    return new BufferedHashBasedReader(this, hashTableSource.createReader(options));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof BufferedHadoopHashTableSource)) {
+      return false;
+    }
+    BufferedHadoopHashTableSource that = (BufferedHadoopHashTableSource) o;
+    return maxBufferSize == that.maxBufferSize
+        && Objects.equal(hashTableSource, that.hashTableSource);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(maxBufferSize, hashTableSource);
+  }
+
+  @Override
+  public String toString() {
+    return "BufferedHadoopHashTableSource ["
+        + immutableBytesToString(hashTableSource.startRowInclusive)
+        + ", "
+        + immutableBytesToString(hashTableSource.stopRowExclusive)
+        + "), maxBufferSize="
+        + maxBufferSize;
+  }
+
+  private static class BufferedHashBasedReader extends BoundedReader<KV<String, List<RangeHash>>> {
+
+    private final BoundedReader<RangeHash> hashReader;
+    private final BufferedHadoopHashTableSource source;
+
+    private List<RangeHash> buffer;
+
+    public BufferedHashBasedReader(
+        BufferedHadoopHashTableSource source, BoundedReader<RangeHash> hashReader) {
+      this.source = source;
+      this.hashReader = hashReader;
+      this.buffer = new ArrayList<>(source.maxBufferSize);
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      if (!hashReader.start()) {
+        // HashReader does not have any hashes, return empty reader.
+        return false;
+      }
+      // Start returned true, consume the current RangeHash.
+      buffer.add(hashReader.getCurrent());
+      bufferRangeHashes();
+      // Buffer is not empty, return true to consume the current buffer.
+      return true;
+    }
+
+    // Reads from hashReader and buffers the RangeHashes.
+    // Returns true if any RangeHashes were read from hashReader.
+    private boolean bufferRangeHashes() throws IOException {
+      boolean readRangeHashes = false;
+      while (buffer.size() < source.maxBufferSize && hashReader.advance()) {
+        readRangeHashes = true;
+        buffer.add(hashReader.getCurrent());
+      }
+      return readRangeHashes;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      // Reset the buffer for next batch.
+      buffer = new ArrayList<>(source.maxBufferSize);
+
+      return bufferRangeHashes();
+    }
+
+    @Override
+    public KV<String, List<RangeHash>> getCurrent() {
+      // getCurrent only gets called when buffer is not empty.
+      Preconditions.checkState(
+          !buffer.isEmpty(), "getCurrent() should only be called when start/advance return true.");
+      // GroupBy key is a string and not ImmutableBytesWritable because the WritableCoder is not
+      // deterministic. The outputted PCollection is grouped by the K and needs a deterministic
+      // coder. Having a String K leads to an unfortunate double encoding, ImmutableBytesWritable->
+      // HEX string -> UTF8 encoded string. The number of batches are significantly smaller than
+      // data fetched from Bigtable and should not have meaningful impact on the job performance.
+      return KV.of(Bytes.toStringBinary(buffer.get(0).startInclusive.copyBytes()), buffer);
+    }
+
+    @Override
+    public void close() throws IOException {
+      hashReader.close();
+    }
+
+    @Override
+    public BoundedSource<KV<String, List<RangeHash>>> getCurrentSource() {
+      return source;
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
new file mode 100644
index 0000000000..a75833b022
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.bigtable.repackaged.com.google.common.base.Preconditions;
+import com.google.bigtable.repackaged.com.google.common.collect.Lists;
+import com.google.cloud.bigtable.beam.AbstractCloudBigtableTableDoFn;
+import com.google.cloud.bigtable.beam.CloudBigtableConfiguration;
+import com.google.cloud.bigtable.beam.TemplateUtils;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
+import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor.BigtableResultHasher;
+
+/**
+ * A {@link DoFn} that takes a row range and hash from HBase and validates the hash from rows read
+ * from Cloud Bigtable.
+ */
+class ComputeAndValidateHashFromBigtableDoFn
+    extends AbstractCloudBigtableTableDoFn<KV<String, Iterable<List<RangeHash>>>, RangeHash> {
+
+  private static final long serialVersionUID = 2349094L;
+  private final ValueProvider<String> tableName;
+  private final ValueProvider<String> projectId;
+  private final ValueProvider<String> sourceHashDir;
+
+  private final TableHashWrapperFactory tableHashWrapperFactory;
+
+  // Counter for reporting matching and mismatching ranges. Names are similar to HBase sync-table
+  // job.
+  private final Counter matches = Metrics.counter("cbt-dataflow-validate", "ranges_matched");
+  private final Counter mismatches = Metrics.counter("cbt-dataflow-validate", "ranges_not_matched");
+
+  public ComputeAndValidateHashFromBigtableDoFn(SyncTableOptions options) {
+    super(TemplateUtils.BuildSyncTableConfig(options));
+    this.tableName = options.getBigtableTableId();
+    // Create a local copy of ValueProviders, PipelineOptions are not serializable.
+    projectId = options.getBigtableProject();
+    sourceHashDir = options.getHashTableOutputDir();
+    tableHashWrapperFactory = new TableHashWrapperFactory();
+  }
+
+  @VisibleForTesting
+  ComputeAndValidateHashFromBigtableDoFn(
+      CloudBigtableConfiguration config,
+      ValueProvider<String> tableName,
+      ValueProvider<String> projectId,
+      ValueProvider<String> sourceHashDir,
+      TableHashWrapperFactory factory) {
+    super(config);
+    this.tableName = tableName;
+    this.tableHashWrapperFactory = factory;
+    this.sourceHashDir = projectId;
+    this.projectId = sourceHashDir;
+  }
+
+  @ProcessElement
+  public void processElement(ProcessContext context) throws Exception {
+    List<List<RangeHash>> wrapperdRangeHashes = Lists.newArrayList(context.element().getValue());
+    // BufferedHadoopHashTableSource generates only 1 item per groupby key, key is startKey for the
+    // Sorted ranges.
+    Preconditions.checkState(
+        wrapperdRangeHashes.size() == 1, "Can not have muiple entries for a key");
+    List<RangeHash> rangeHashes = wrapperdRangeHashes.get(0);
+    Preconditions.checkState(!rangeHashes.isEmpty(), "Can not have empty ranges in DO_FN");
+
+    ImmutableBytesWritable rangeStartInclusive = rangeHashes.get(0).startInclusive;
+    ImmutableBytesWritable rangeEndExclusive =
+        rangeHashes.get(rangeHashes.size() - 1).stopExclusive;
+
+    BigtableResultHasher resultHasher = new BigtableResultHasher();
+    resultHasher.startBatch(rangeStartInclusive);
+
+    // Since all the row-ranges are sorted in HashTable's data files, 1 big scan can be used
+    // to read all the row ranges. Parallelism is achieved by splitting the HashTable's data
+    // files into smaller bundle of row-ranges in GroupBy.
+    ResultScanner scanner =
+        createBigtableScan(rangeStartInclusive.copyBytes(), rangeEndExclusive.copyBytes());
+
+    Iterator<RangeHash> rangeHashIterator = rangeHashes.iterator();
+    long numRows = 0;
+
+    RangeHash currentRangeHash = rangeHashIterator.next();
+
+    // Process each row and validate hashes
+    for (Result result : scanner) {
+      numRows++;
+      if (numRows % 10_000 == 0) {
+        // Heartbeat in logs in case a large scan gets hung.
+        DOFN_LOG.debug("Processed " + numRows + " rows ");
+      }
+
+      ImmutableBytesWritable rowKey = new ImmutableBytesWritable(result.getRow());
+
+      // Check if the rowKey belongs to current range, if not keep iterating through the
+      // rangeHashes until rowKey's range is found.
+      while (!isWithinUpperBound(currentRangeHash.stopExclusive, rowKey)) {
+        validateBatchHash(context, resultHasher, currentRangeHash);
+        // THIS SHOULD NEVER HAPPEN. Bigtable is being scanned till the last
+        // RangeHash.endKeyExclusive(), so bigtable's result should not outlast the
+        // rangeHashes.
+        Preconditions.checkState(
+            rangeHashIterator.hasNext(),
+            "Buffer reached to end while scan is still active at row : %s. "
+                + "Affected Range: [%s, %s)."
+                + immutableBytesToString(result.getRow())
+                + immutableBytesToString(rangeStartInclusive)
+                + immutableBytesToString(rangeEndExclusive));
+        currentRangeHash = rangeHashIterator.next();
+      }
+
+      // Always Hash the current row.
+      resultHasher.hashResult(result);
+    }
+
+    // Bigtable scan is finished at this point and rangeHashes may contain additional row ranges.
+    // Last range will always be unverified as the range end is exclusive and
+    // currentRow > rangeEndExclusive will never by true. Verify the last range.
+    validateBatchHash(context, resultHasher, currentRangeHash);
+
+    // If there are remaining ranges in the rangeHashes they all need to reported as mismatched as
+    // there is nothing in Cloud Bigtable for those row ranges.
+    // for (int i = bufferIndex; i < rangeHashes.size(); i++) {
+    while (rangeHashIterator.hasNext()) {
+      currentRangeHash = rangeHashIterator.next();
+      reportMismatch(context, currentRangeHash);
+    }
+
+    DOFN_LOG.debug(
+        "Finishing context by outputting {}  keys in range [{}, {}).",
+        rangeHashes.size(),
+        immutableBytesToString(rangeStartInclusive),
+        immutableBytesToString(rangeEndExclusive));
+  }
+
+  private ResultScanner createBigtableScan(byte[] startKeyInclusive, byte[] stopKeyExclusive)
+      throws IOException {
+    Table table = getConnection().getTable(TableName.valueOf(tableName.get()));
+    // Get the scan from TableHash, HashTable can be run to hash a small part of data (selected
+    // column families, timestamp range, maxVersions etc), this scan allows us to fetch the same
+    // data from Cloud Bigtable to match.
+    TableHashWrapper tableHash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+    Scan scan = tableHash.getScan();
+    // Set the workitem boundaries on the scan.
+    if (startKeyInclusive.length > 0) {
+      scan.withStartRow(startKeyInclusive, true);
+    }
+    if (stopKeyExclusive.length > 0) {
+      scan.withStopRow(stopKeyExclusive, false);
+    }
+
+    return table.getScanner(scan);
+  }
+
+  /**
+   * Determines if row >= stopExclusive for a row range (start, stopExclusive). Empty stopExclusive
+   * represents a range with no upper bound.
+   */
+  private static boolean isWithinUpperBound(
+      ImmutableBytesWritable stopExclusive, ImmutableBytesWritable row) {
+    return stopExclusive.equals(HConstants.EMPTY_END_ROW) || row.compareTo(stopExclusive) < 0;
+  }
+
+  private void validateBatchHash(
+      ProcessContext context, BigtableResultHasher resultHasher, RangeHash currentRangeHash) {
+    // The batch is always started, so its safe to finish the batch. If there were no rows, we will
+    // get a hash for empty batch.
+    resultHasher.finishBatch();
+    if (!resultHasher.getBatchHash().equals(currentRangeHash.hash)) {
+      reportMismatch(context, currentRangeHash);
+    } else {
+      matches.inc();
+    }
+    // Start a new batch
+    resultHasher.startBatch(currentRangeHash.stopExclusive);
+  }
+
+  private void reportMismatch(ProcessContext context, RangeHash currentRangeHash) {
+    mismatches.inc();
+    DOFN_LOG.info(
+        "MISMATCH ON RANGE [{}, {}).",
+        immutableBytesToString(currentRangeHash.startInclusive),
+        immutableBytesToString(currentRangeHash.stopExclusive));
+    context.output(currentRangeHash);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
new file mode 100644
index 0000000000..f6ecf21e24
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
@@ -0,0 +1,440 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import com.google.bigtable.repackaged.com.google.common.annotations.VisibleForTesting;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.beam.validation.TableHashWrapper.TableHashReader;
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.DefaultCoder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * A beam source to read output of Hadoop HashTable job. The source creates 1 workitem per HashTable
+ * data file and emits a row-range/hash pair.
+ */
+@InternalApi
+public class HadoopHashTableSource extends BoundedSource<RangeHash> implements Serializable {
+
+  private static final long serialVersionUID = 2383724L;
+
+  private static final Coder<RangeHash> CODER = RangeHashCoder.of();
+
+  /**
+   * A simple POJO encapsulating a row range and the corresponding hash generated by HashTable job.
+   * TODO Evaluate if we can use AutoValue for this class.
+   */
+  @DefaultCoder(RangeHashCoder.class)
+  public static class RangeHash {
+
+    public final ImmutableBytesWritable startInclusive;
+    public final ImmutableBytesWritable stopExclusive;
+    public final ImmutableBytesWritable hash;
+
+    private RangeHash(
+        ImmutableBytesWritable startInclusive,
+        ImmutableBytesWritable stopExclusive,
+        ImmutableBytesWritable hash) {
+      this.startInclusive = startInclusive;
+      this.stopExclusive = stopExclusive;
+      this.hash = hash;
+    }
+
+    static RangeHash of(
+        ImmutableBytesWritable startInclusive,
+        ImmutableBytesWritable stopExclusive,
+        ImmutableBytesWritable hash) {
+      Preconditions.checkNotNull(startInclusive);
+      Preconditions.checkNotNull(stopExclusive);
+      Preconditions.checkNotNull(hash);
+      return new RangeHash(startInclusive, stopExclusive, hash);
+    }
+
+    @Override
+    public String toString() {
+      return String.format(
+          "RangeHash{ range = [ %s, %s), hash: %s }",
+          immutableBytesToString(startInclusive),
+          immutableBytesToString(stopExclusive),
+          immutableBytesToString(hash));
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof RangeHash)) {
+        return false;
+      }
+      RangeHash rangeHash = (RangeHash) o;
+      return Objects.equal(startInclusive, rangeHash.startInclusive)
+          && Objects.equal(stopExclusive, rangeHash.stopExclusive)
+          && Objects.equal(hash, rangeHash.hash);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(startInclusive, stopExclusive, hash);
+    }
+  }
+
+  public static final Log LOG = LogFactory.getLog(HadoopHashTableSource.class);
+
+  private final ValueProvider<String> projectId;
+
+  // Path to the output of HashTable job. Usually in GCS.
+  private final ValueProvider<String> sourceHashDir;
+
+  // Row range owned by this source.
+  // The Start and Stop row are serialized in a custom way.
+  @VisibleForTesting @Nullable transient ImmutableBytesWritable startRowInclusive;
+
+  @VisibleForTesting @Nullable transient ImmutableBytesWritable stopRowExclusive;
+
+  private final TableHashWrapperFactory tableHashWrapperFactory;
+
+  /**
+   * Creates a HadoopHashTableSource that reads HashTable data from hashTableOutputDir in GCS bucket
+   * in project $(projectId).
+   */
+  public HadoopHashTableSource(
+      ValueProvider<String> projectId, ValueProvider<String> sourceHashDir) {
+    this(projectId, sourceHashDir, /*startRowInclusive*/ null, /*stopRowExclusive*/ null);
+  }
+
+  /**
+   * Constructor to initialize a HadoopHashTableSource for a given row-range. Used for creating
+   * split sources.
+   */
+  @VisibleForTesting
+  HadoopHashTableSource(
+      ValueProvider<String> projectId,
+      ValueProvider<String> sourceHashDir,
+      @Nullable ImmutableBytesWritable startRowInclusive,
+      @Nullable ImmutableBytesWritable stopRowExclusive) {
+    this(
+        projectId,
+        sourceHashDir,
+        startRowInclusive,
+        stopRowExclusive,
+        new TableHashWrapperFactory());
+  }
+
+  @VisibleForTesting
+  HadoopHashTableSource(
+      ValueProvider<String> projectId,
+      ValueProvider<String> hadoopHashTableOutputDir,
+      @Nullable ImmutableBytesWritable startRowInclusive,
+      @Nullable ImmutableBytesWritable stopRowExclusive,
+      TableHashWrapperFactory tableHashWrapperFactory) {
+    this.projectId = projectId;
+    this.sourceHashDir = hadoopHashTableOutputDir;
+    // startRow and stopRow will be null when the template is initialized. startRow and stopRow are
+    // read from the hashTableOutputDir, which is only available at pipeline runtime.
+    this.startRowInclusive = startRowInclusive;
+    this.stopRowExclusive = stopRowExclusive;
+    this.tableHashWrapperFactory = tableHashWrapperFactory;
+  }
+
+  @Override
+  public List<? extends BoundedSource<RangeHash>> split(
+      long desiredBundleSizeBytes, PipelineOptions options) throws IOException {
+    // This method relies on the partitioning done by HBase-HashTable job. There is a possibility
+    // of stragglers. SyncTable handles it by using a group by and further splitting workitems.
+    TableHashWrapper hash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+
+    ImmutableList<ImmutableBytesWritable> partitions = hash.getPartitions();
+    int numPartitions = partitions.size();
+
+    List<HadoopHashTableSource> splitSources = new ArrayList<>(numPartitions + 1);
+    if (numPartitions == 0) {
+      // There are 0 partitions and 1 hashfile, return single source with full key range.
+      splitSources.add(
+          new HadoopHashTableSource(
+              projectId,
+              sourceHashDir,
+              hash.getStartRow(),
+              hash.getStopRow(),
+              tableHashWrapperFactory));
+      return splitSources;
+    }
+
+    // Use the HashTable start key. The value is HConstants.EMPTY_START_ROW for full table scan.
+    ImmutableBytesWritable nextStartRow = hash.getStartRow();
+    ImmutableBytesWritable stopRow = hash.getStopRow();
+
+    // The output of HashTable is organized as partition file and a set of datafiles.
+    // Partition file contains a list of partitions, these partitions split the key-range of a table
+    // into roughly equal row-ranges and hashes for these row-ranges are stored in a single
+    // datafile.
+    //
+    // There are always numPartitions +1 data files. Datafile(i) covers hashes for [partition{i-1},
+    // partition{i}).
+    // So a partition file containing entries [b,f] for a table with row range [a,z] will have 3
+    // data files containing hashes.
+    // file0 will contain [a(nextStartRow), b), file1 will contain [b,f), and file3 will contain
+    // [f,z(stopRow))
+    for (int i = 0; i < numPartitions; i++) {
+      // TODO make a utility function that generates [start, end) format from start/end.
+      LOG.debug(
+          "Adding: ["
+              + immutableBytesToString(nextStartRow.get())
+              + ", "
+              + immutableBytesToString(partitions.get(i).get())
+              + ")");
+      splitSources.add(
+          new HadoopHashTableSource(
+              projectId, sourceHashDir, nextStartRow, partitions.get(i), tableHashWrapperFactory));
+      nextStartRow = partitions.get(i);
+    }
+    // Add the last range for [lastPartition, stopRow).
+    LOG.debug(
+        "Adding: ["
+            + immutableBytesToString(nextStartRow.get())
+            + ", "
+            + immutableBytesToString(stopRow.get())
+            + ")");
+    // Add the last range for [lastPartition, stopRow).
+    splitSources.add(
+        new HadoopHashTableSource(
+            projectId, sourceHashDir, nextStartRow, stopRow, tableHashWrapperFactory));
+    LOG.info("Returning " + splitSources.size() + " sources from " + numPartitions + " partitions");
+    return splitSources;
+  }
+
+  @Override
+  public Coder<RangeHash> getOutputCoder() {
+    return CODER;
+  }
+
+  @Override
+  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    // HashTable data files don't expose a method to estimate size or lineCount.
+    return 0;
+  }
+
+  @Override
+  public BoundedReader<RangeHash> createReader(PipelineOptions options) throws IOException {
+    TableHashWrapper hash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+
+    // The row range for an un-split source is determined from the output of HashTable job.
+    // HashTableOutputDir is a runtime parameter and hence not available at construction time, so
+    // populate the start and stop here.
+    if (startRowInclusive == null || stopRowExclusive == null) {
+      startRowInclusive = hash.getStartRow();
+      stopRowExclusive = hash.getStopRow();
+    }
+
+    return new HashBasedReader(
+        this,
+        startRowInclusive,
+        stopRowExclusive,
+        hash.newReader(
+            SyncTableUtils.createConfiguration(this.projectId.get(), this.sourceHashDir.get()),
+            startRowInclusive));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof HadoopHashTableSource)) {
+      return false;
+    }
+    HadoopHashTableSource that = (HadoopHashTableSource) o;
+    return Objects.equal(projectId, that.projectId)
+        && Objects.equal(sourceHashDir, that.sourceHashDir)
+        && Objects.equal(startRowInclusive, that.startRowInclusive)
+        && Objects.equal(stopRowExclusive, that.stopRowExclusive);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(projectId, sourceHashDir, startRowInclusive, stopRowExclusive);
+  }
+
+  @Override
+  public String toString() {
+    return "HadoopHashTableSource ["
+        + immutableBytesToString(startRowInclusive)
+        + ", "
+        + immutableBytesToString(stopRowExclusive)
+        + ')';
+  }
+
+  private void writeObject(ObjectOutputStream s) throws IOException {
+    s.defaultWriteObject();
+    // Start and Stop can be null, write a boolean to indicate if start/stop is expected.
+    if (startRowInclusive == null) {
+      s.writeBoolean(false);
+    } else {
+      s.writeBoolean(true);
+      s.writeObject(startRowInclusive.copyBytes());
+    }
+
+    if (stopRowExclusive == null) {
+      s.writeBoolean(false);
+    } else {
+      s.writeBoolean(true);
+      s.writeObject(stopRowExclusive.copyBytes());
+    }
+  }
+
+  private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
+    s.defaultReadObject();
+    // start/stop can be null, they are preceded by a boolean indicating their presence.
+    if (s.readBoolean() == true) {
+      startRowInclusive = new ImmutableBytesWritable((byte[]) s.readObject());
+    }
+    if (s.readBoolean() == true) {
+      stopRowExclusive = new ImmutableBytesWritable((byte[]) s.readObject());
+    }
+  }
+
+  @VisibleForTesting
+  static class HashBasedReader extends BoundedReader<RangeHash> {
+
+    private final HadoopHashTableSource source;
+    private final TableHashReader reader;
+
+    @VisibleForTesting final ImmutableBytesWritable startRowInclusive;
+    @VisibleForTesting final ImmutableBytesWritable stopRowExclusive;
+
+    // Flag indicating that this workitem is finished.
+    private boolean isDone = false;
+    private ImmutableBytesWritable currentRangeStartKey;
+    // Hash for the current range.
+    private ImmutableBytesWritable currentHash;
+    private RangeHash currentRangeHash;
+
+    public HashBasedReader(
+        HadoopHashTableSource source,
+        ImmutableBytesWritable startRowInclusive,
+        ImmutableBytesWritable stopRowExclusive,
+        TableHashReader reader) {
+      this.source = source;
+      this.startRowInclusive = startRowInclusive;
+      this.stopRowExclusive = stopRowExclusive;
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      LOG.debug(
+          "Starting a new reader at key range ["
+              + immutableBytesToString(startRowInclusive)
+              + " ,"
+              + immutableBytesToString(stopRowExclusive)
+              + ").");
+
+      if (readNextKey()) {
+        // Dataflow calls start, followed by getCurrent. HashBased reader needs to read on TableHash
+        // twice to return a RangeHash since it specifies both range-start and range-end.
+        advance();
+        return true;
+      }
+
+      isDone = true;
+      return false;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      if (isDone) {
+        LOG.debug("Ending workitem at key " + immutableBytesToString(currentRangeStartKey) + " .");
+        return false;
+      }
+
+      ImmutableBytesWritable startKey = this.currentRangeStartKey;
+      ImmutableBytesWritable hash = this.currentHash;
+
+      // if there is nothing to read, we are done. readNextKey advances the currentRangeStartKey.
+      isDone = !readNextKey();
+      currentRangeHash = RangeHash.of(startKey, currentRangeStartKey, hash);
+
+      return true;
+    }
+
+    // Returns true if a key can be read for this workitem.
+    private boolean readNextKey() throws IOException {
+      if (reader.next()) {
+        currentRangeStartKey = reader.getCurrentKey();
+        if ( // StopRow is not set, everything is in bounds.
+        (stopRowExclusive.equals(HConstants.EMPTY_END_ROW)
+            || currentRangeStartKey.compareTo(stopRowExclusive) < 0)) { // currentKey < stopKey
+          // There is a key to read and the key is within the bounds of this workitem. Return true.
+          currentHash = reader.getCurrentHash();
+          return true;
+        } else {
+          // There is a key to read but its outside of the bounds of this workitem.
+          currentHash = null;
+          return false;
+        }
+      }
+
+      // Nothing left to read for this workitem. Next range would have started from
+      // stopRowExclusive.
+      currentRangeStartKey = stopRowExclusive;
+      currentHash = null;
+      return false;
+    }
+
+    @Override
+    public RangeHash getCurrent() {
+      return currentRangeHash;
+    }
+
+    @Override
+    public void close() throws IOException {
+      LOG.info(
+          "Finishing a reader for key range ["
+              + immutableBytesToString(startRowInclusive)
+              + " ,"
+              + immutableBytesToString(stopRowExclusive)
+              + "). Ending at "
+              + immutableBytesToString(currentRangeStartKey));
+      reader.close();
+    }
+
+    @Override
+    public BoundedSource<RangeHash> getCurrentSource() {
+      return source;
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
new file mode 100644
index 0000000000..d6341a08f2
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InvalidObjectException;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.List;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/** Coder used by beam to encode/decode @{@link RangeHash} objects. */
+public class RangeHashCoder extends Coder<RangeHash> {
+
+  public static Coder<RangeHash> of() {
+    return new RangeHashCoder();
+  }
+
+  @Override
+  public void encode(RangeHash value, OutputStream outStream) throws IOException {
+    if (value == null) {
+      throw new CoderException("Can not encode null objects.");
+    }
+    DataOutputStream dataOutputStream = new DataOutputStream(outStream);
+    // RangeHash fields can never be null.
+    value.startInclusive.write(dataOutputStream);
+    value.stopExclusive.write(dataOutputStream);
+    value.hash.write(dataOutputStream);
+  }
+
+  @Override
+  public RangeHash decode(InputStream inStream) throws IOException {
+    DataInputStream dataInputStream = new DataInputStream(inStream);
+
+    ImmutableBytesWritable startInclusive = new ImmutableBytesWritable();
+    startInclusive.readFields(dataInputStream);
+
+    ImmutableBytesWritable stopExclusive = new ImmutableBytesWritable();
+    stopExclusive.readFields(dataInputStream);
+
+    ImmutableBytesWritable hash = new ImmutableBytesWritable();
+    hash.readFields(dataInputStream);
+
+    return RangeHash.of(startInclusive, stopExclusive, hash);
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    // This is a deterministic coder as it writes the byte[] in order.
+  }
+
+  /**
+   * !!! DO NOT DELETE !!!
+   *
+   * <p>See readObjectNoData method in:
+   * https://docs.oracle.com/javase/7/docs/platform/serialization/spec/input.html#6053.
+   *
+   * <p>Disable backwards compatibility with previous versions that were serialized.
+   *
+   * @throws InvalidObjectException
+   */
+  @SuppressWarnings("unused")
+  private void readObjectNoData() throws InvalidObjectException {
+    throw new InvalidObjectException("Hash data required");
+  }
+
+  @Override
+  protected Object clone() throws CloneNotSupportedException {
+    return super.clone();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return other instanceof RangeHashCoder;
+  }
+
+  @Override
+  public int hashCode() {
+    return RangeHashCoder.class.hashCode();
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
new file mode 100644
index 0000000000..56b38fc3cb
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly;
+import com.google.bigtable.repackaged.com.google.gson.Gson;
+import com.google.cloud.bigtable.beam.sequencefiles.Utils;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.annotations.VisibleForTesting;
+import java.util.List;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
+import org.apache.beam.sdk.metrics.MetricResult;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * A job that takes HBase HashTable output and compares the hashes from Cloud Bigtable table.
+ *
+ * <p>Execute the following command to run the job directly:
+ *
+ * <pre>
+ *   mvn compile exec:java \
+ *      -DmainClass=com.google.cloud.bigtable.beam.validation.SyncTableJob \
+ *      -Dexec.args="--runner=DataflowRunner \
+ *            --project=$PROJECT \
+ *            --bigtableInstanceId=$INSTANCE \
+ *            --bigtableTableId=$TABLE \
+ *            --sourceHashDir=$SOURCE_HASH_DIR \
+ *            --outputPrefix=$OUtPUT_PREFIX \
+ *            --stagingLocation=$STAGING_LOC \
+ *            --tempLocation=$TMP_LOC \
+ *            --region=$REGION \
+ *            --workerZone=$WORKER_ZONE"
+ * </pre>
+ *
+ * <p>Execute the following command to create the Dataflow template:
+ *
+ * <pre>
+ * mvn compile exec:java \
+ *   -DmainClass=com.google.cloud.bigtable.beam.validation.SyncTableJob \
+ *   -Dexec.args="--runner=DataflowRunner \
+ *                --project=$PROJECT \
+ *                --stagingLocation=gs://$STAGING_PATH \
+ *                --templateLocation=gs://$TEMPLATE_PATH \
+ *                --wait=false"
+ * </pre>
+ *
+ * <p>There are a few ways to run the pipeline using the template. See Dataflow doc for details:
+ * https://cloud.google.com/dataflow/docs/templates/executing-templates. Optionally, you can upload
+ * a metadata file that contains information about the runtime parameters that can be used for
+ * parameter validation purpose and more. A sample metadata file can be found at
+ * "src/main/resources/SyncTableJob_metadata".
+ *
+ * <p>An example using gcloud command line:
+ *
+ * <pre>
+ * gcloud beta dataflow jobs run $JOB_NAME \
+ *   --gcs-location gs://$TEMPLATE_PATH \
+ *   --parameters bigtableProject=$PROJECT,bigtableInstanceId=$INSTANCE,bigtableTableId=$TABLE,sourceHashDir=gs://$SOURCE_HASH_DIR,outputPrefix=$OUTPUT_PREFIX
+ * </pre>
+ */
+@InternalExtensionOnly
+public class SyncTableJob {
+
+  private static final Log LOG = LogFactory.getLog(SyncTableJob.class);
+
+  public interface SyncTableOptions extends GcpOptions {
+
+    @Description("This Bigtable App Profile id.")
+    ValueProvider<String> getBigtableAppProfileId();
+
+    @SuppressWarnings("unused")
+    void setBigtableAppProfileId(ValueProvider<String> appProfileId);
+
+    @Description("The project that contains the table to export. Defaults to --project.")
+    @Default.InstanceFactory(Utils.DefaultBigtableProjectFactory.class)
+    ValueProvider<String> getBigtableProject();
+
+    @SuppressWarnings("unused")
+    void setBigtableProject(ValueProvider<String> projectId);
+
+    @Description("The Bigtable instance id that contains the table to export.")
+    ValueProvider<String> getBigtableInstanceId();
+
+    @SuppressWarnings("unused")
+    void setBigtableInstanceId(ValueProvider<String> instanceId);
+
+    @Description("The Bigtable table id to export.")
+    ValueProvider<String> getBigtableTableId();
+
+    @SuppressWarnings("unused")
+    void setBigtableTableId(ValueProvider<String> tableId);
+
+    @Description("HBase HashTable job output dir.")
+    ValueProvider<String> getHashTableOutputDir();
+
+    @SuppressWarnings("unused")
+    // Rename it to sourceHashDir as in HBase sync table job.
+    void setHashTableOutputDir(ValueProvider<String> hashTableOutputDir);
+
+    @Description("File pattern for files containing mismatched row ranges.")
+    ValueProvider<String> getOutputPrefix();
+
+    @SuppressWarnings("unused")
+    void setOutputPrefix(ValueProvider<String> outputPrefix);
+
+    // When creating a template, this flag must be set to false.
+    @Description("Wait for pipeline to finish.")
+    @Default.Boolean(true)
+    boolean getWait();
+
+    @SuppressWarnings("unused")
+    void setWait(boolean wait);
+  }
+
+  public static void main(String[] args) {
+    PipelineOptionsFactory.register(SyncTableOptions.class);
+
+    SyncTableOptions opts =
+        PipelineOptionsFactory.fromArgs(args).withValidation().as(SyncTableOptions.class);
+
+    LOG.info("===> Building Pipeline");
+    Pipeline pipeline = buildPipeline(opts);
+
+    LOG.info("===> Running Pipeline");
+    PipelineResult result = pipeline.run();
+
+    if (opts.getWait()) {
+      Utils.waitForPipelineToFinish(result);
+    }
+
+    // Log all the counters for number of matches and number of mismatches.
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    for (MetricResult<Long> counter : metrics.getCounters()) {
+      LOG.warn(counter.getName() + ":" + counter.getAttempted());
+    }
+  }
+
+  @VisibleForTesting
+  public static Pipeline buildPipeline(SyncTableOptions opts) {
+    Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts));
+    pipeline
+        .apply(
+            "Read HBase HashTable output",
+            Read.from(
+                new BufferedHadoopHashTableSource(
+                    new HadoopHashTableSource(
+                        opts.getBigtableProject(), opts.getHashTableOutputDir()))))
+        .apply(
+            "group by and create granular workitems", GroupByKey.<String, List<RangeHash>>create())
+        .apply("validate hash", ParDo.of(new ComputeAndValidateHashFromBigtableDoFn(opts)))
+        .apply("Serialize the ranges", MapElements.via(new RangeHashToString()))
+        .apply("Write to file", TextIO.write().to(opts.getOutputPrefix()).withSuffix(".txt"));
+    return pipeline;
+  }
+
+  static class RangeHashToString extends SimpleFunction<RangeHash, String> {
+    // TODO maybe explore a sequenceFile sink for RangeHash. Hadoop jobs using this output may be
+    // easier to write for sequence file.
+    private static final Gson GSON = new Gson();
+
+    @Override
+    public String apply(RangeHash input) {
+      return GSON.toJson(input);
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
new file mode 100644
index 0000000000..cc92bea6a4
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/** Utility class for SyncTable job. */
+@InternalApi
+public class SyncTableUtils {
+
+  private SyncTableUtils() {}
+
+  public static String immutableBytesToString(ImmutableBytesWritable bytes) {
+    if (bytes == null) {
+      return "";
+    }
+    return immutableBytesToString(bytes.get());
+  }
+
+  public static String immutableBytesToString(byte[] bytes) {
+    return Bytes.toStringBinary(bytes);
+  }
+
+  /**
+   * Creates a HBase configuration for reading HashTable output from GCS bucket located in
+   * projectId.
+   *
+   * @param projectId project containing the GCS bucket holding hashtable output.
+   * @param sourceHashDir location of hashtable output from HBase.
+   * @return
+   */
+  public static Configuration createConfiguration(String projectId, String sourceHashDir) {
+    Configuration conf = HBaseConfiguration.create();
+    conf.set("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS");
+    conf.set("fs.gs.project.id", projectId);
+    conf.set("fs.defaultFS", sourceHashDir);
+    conf.set("google.cloud.auth.service.account.enable", "true");
+    return conf;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
new file mode 100644
index 0000000000..55200570ed
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import com.google.common.collect.ImmutableList;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.Serializable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * Wraps HashTable.TableHash object and delegates the calls to it. This class exposes the minimal
+ * interface required from TableHash. This class is required for mocking purposes in unit tests.
+ */
+@InternalApi
+public interface TableHashWrapper extends Serializable {
+
+  int getNumHashFiles();
+
+  ImmutableList<ImmutableBytesWritable> getPartitions();
+
+  ImmutableBytesWritable getStartRow();
+
+  ImmutableBytesWritable getStopRow();
+
+  Scan getScan();
+
+  TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow);
+
+  interface TableHashReader extends Closeable {
+    boolean next() throws IOException;
+
+    ImmutableBytesWritable getCurrentKey();
+
+    ImmutableBytesWritable getCurrentHash();
+
+    void close() throws IOException;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
new file mode 100644
index 0000000000..a4e3544519
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.createConfiguration;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import java.io.IOException;
+import java.io.Serializable;
+
+/** Factory to create a TableHashWrapper. */
+@InternalApi
+public class TableHashWrapperFactory implements Serializable {
+
+  private static final long serialVersionUID = 265433454L;
+
+  public TableHashWrapper getTableHash(String projectId, String sourceHashDir) throws IOException {
+    return TableHashWrapperImpl.create(
+        createConfiguration(projectId, sourceHashDir), sourceHashDir);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
new file mode 100644
index 0000000000..b04bd538a6
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash.Reader;
+
+class TableHashWrapperImpl implements TableHashWrapper {
+
+  static TableHashWrapper create(Configuration conf, String hashTableOutputDir) throws IOException {
+    TableHash tableHash = TableHash.read(conf, new Path(hashTableOutputDir));
+
+    TableHashWrapper tableHashWrapper = new TableHashWrapperImpl(tableHash);
+    Preconditions.checkArgument(
+        tableHashWrapper.getNumHashFiles() == (tableHashWrapper.getPartitions().size() + 1),
+        "Corrupt hashtable output. %d hash files for %d partitions. Expected %d files.",
+        tableHashWrapper.getNumHashFiles(),
+        tableHashWrapper.getPartitions().size(),
+        tableHashWrapper.getPartitions().size() + 1);
+    return tableHashWrapper;
+  }
+
+  private final TableHash hash;
+
+  private TableHashWrapperImpl(TableHash hash) {
+    this.hash = hash;
+  }
+
+  public int getNumHashFiles() {
+    return BigtableTableHashAccessor.getNumHashFiles(hash);
+  }
+
+  public ImmutableList<ImmutableBytesWritable> getPartitions() {
+    return BigtableTableHashAccessor.getPartitions(hash);
+  }
+
+  public ImmutableBytesWritable getStartRow() {
+    return BigtableTableHashAccessor.getStartRow(hash);
+  }
+
+  public ImmutableBytesWritable getStopRow() {
+    return BigtableTableHashAccessor.getStopRow(hash);
+  }
+
+  public Scan getScan() {
+    try {
+      return BigtableTableHashAccessor.getScan(hash);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to init a scan from TableHash: ", e);
+    }
+  }
+
+  public TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow) {
+    try {
+      return TableHashReaderImpl.create(hash.newReader(conf, startRow));
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Failed to open reader at " + immutableBytesToString(startRow.copyBytes()), e);
+    }
+  }
+
+  static class TableHashReaderImpl implements TableHashReader {
+
+    private final Reader reader;
+
+    static TableHashReaderImpl create(TableHash.Reader reader) {
+      Preconditions.checkNotNull(reader, "Reader can not be null.");
+      return new TableHashReaderImpl(reader);
+    }
+
+    private TableHashReaderImpl(TableHash.Reader reader) {
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean next() throws IOException {
+      return reader.next();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentKey() {
+      return reader.getCurrentKey();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentHash() {
+      return reader.getCurrentHash();
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
new file mode 100644
index 0000000000..a7db0add1c
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.HashTable.ResultHasher;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash;
+
+/** A helper class to access package private fields of HashTable.TableHash. */
+@InternalApi
+public class BigtableTableHashAccessor {
+
+  // Restrict object creation. This class should only be used to access state from TableHash.
+  private BigtableTableHashAccessor() {}
+
+  public static int getNumHashFiles(TableHash hash) {
+    return hash.numHashFiles;
+  }
+
+  public static ImmutableList<ImmutableBytesWritable> getPartitions(TableHash hash) {
+    return ImmutableList.copyOf(hash.partitions);
+  }
+
+  public static ImmutableBytesWritable getStartRow(TableHash hash) {
+    return new ImmutableBytesWritable(hash.startRow);
+  }
+
+  public static ImmutableBytesWritable getStopRow(TableHash hash) {
+    return new ImmutableBytesWritable(hash.stopRow);
+  }
+
+  public static Scan getScan(TableHash hash) throws IOException {
+    return hash.initScan();
+  }
+
+  // Wrapper to access package private class ResultHasher. Delegates all the calls to underlying
+  // TableHash.ResultHasher, helps in mocking for unit tests.
+  public static class BigtableResultHasher {
+    private final ResultHasher hasher;
+
+    public BigtableResultHasher() {
+      hasher = new ResultHasher();
+    }
+
+    public void startBatch(ImmutableBytesWritable batchStartKey) {
+      hasher.startBatch(batchStartKey);
+    }
+
+    public void finishBatch() {
+      hasher.finishBatch();
+    }
+
+    public ImmutableBytesWritable getBatchHash() {
+      return hasher.getBatchHash();
+    }
+
+    public void hashResult(Result result) {
+      hasher.hashResult(result);
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc
deleted file mode 100644
index 8fe4533a01..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc
deleted file mode 100644
index 1467a17f1f..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
deleted file mode 100644
index 83e482aac0..0000000000
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
+++ /dev/null
@@ -1,2 +0,0 @@
-
-test-snapshottest�����. (
\ No newline at end of file
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc
deleted file mode 100644
index ea5b25e778..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc
deleted file mode 100644
index 51cacdd03b..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc
deleted file mode 100644
index 2c4de3ac0e..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc
deleted file mode 100644
index 931ebfb545..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc
deleted file mode 100644
index 32f450dba4..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc
deleted file mode 100644
index 80317a1515..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc
deleted file mode 100644
index 00a9d7720d..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc
deleted file mode 100644
index 1d7e3d8653..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc
deleted file mode 100644
index ca57c97e2d..0000000000
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc and /dev/null differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
index 7f8f8fc2db..6e66d3e096 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
@@ -1,107 +1,133 @@
+// Run from HBase shell. Run `hbase shell` from unix terminal on HBase master.
 create 'test', 'cf', {SPLITS => ["1", "2", "3", "4", "5", "6", "7", "8", "9"]}
-put 'test','1', 'cf:a', 'value1'
-put 'test','2', 'cf:a', 'value2'
-put 'test','3', 'cf:a', 'value3'
-put 'test','4', 'cf:a', 'value4'
-put 'test','5', 'cf:a', 'value5'
-put 'test','6', 'cf:a', 'value6'
-put 'test','7', 'cf:a', 'value7'
-put 'test','8', 'cf:a', 'value8'
-put 'test','9', 'cf:a', 'value9'
-put 'test','10', 'cf:a', 'value10'
-put 'test','11', 'cf:a', 'value11'
-put 'test','12', 'cf:a', 'value12'
-put 'test','13', 'cf:a', 'value13'
-put 'test','14', 'cf:a', 'value14'
-put 'test','15', 'cf:a', 'value15'
-put 'test','16', 'cf:a', 'value16'
-put 'test','17', 'cf:a', 'value17'
-put 'test','18', 'cf:a', 'value18'
-put 'test','19', 'cf:a', 'value19'
-put 'test','20', 'cf:a', 'value20'
-put 'test','21', 'cf:a', 'value21'
-put 'test','22', 'cf:a', 'value22'
-put 'test','23', 'cf:a', 'value23'
-put 'test','24', 'cf:a', 'value24'
-put 'test','25', 'cf:a', 'value25'
-put 'test','26', 'cf:a', 'value26'
-put 'test','27', 'cf:a', 'value27'
-put 'test','28', 'cf:a', 'value28'
-put 'test','29', 'cf:a', 'value29'
-put 'test','30', 'cf:a', 'value30'
-put 'test','31', 'cf:a', 'value31'
-put 'test','32', 'cf:a', 'value32'
-put 'test','33', 'cf:a', 'value33'
-put 'test','34', 'cf:a', 'value34'
-put 'test','35', 'cf:a', 'value35'
-put 'test','36', 'cf:a', 'value36'
-put 'test','37', 'cf:a', 'value37'
-put 'test','38', 'cf:a', 'value38'
-put 'test','39', 'cf:a', 'value39'
-put 'test','40', 'cf:a', 'value40'
-put 'test','41', 'cf:a', 'value41'
-put 'test','42', 'cf:a', 'value42'
-put 'test','43', 'cf:a', 'value43'
-put 'test','44', 'cf:a', 'value44'
-put 'test','45', 'cf:a', 'value45'
-put 'test','46', 'cf:a', 'value46'
-put 'test','47', 'cf:a', 'value47'
-put 'test','48', 'cf:a', 'value48'
-put 'test','49', 'cf:a', 'value49'
-put 'test','50', 'cf:a', 'value50'
-put 'test','51', 'cf:a', 'value51'
-put 'test','52', 'cf:a', 'value52'
-put 'test','53', 'cf:a', 'value53'
-put 'test','54', 'cf:a', 'value54'
-put 'test','55', 'cf:a', 'value55'
-put 'test','56', 'cf:a', 'value56'
-put 'test','57', 'cf:a', 'value57'
-put 'test','58', 'cf:a', 'value58'
-put 'test','59', 'cf:a', 'value59'
-put 'test','60', 'cf:a', 'value60'
-put 'test','61', 'cf:a', 'value61'
-put 'test','62', 'cf:a', 'value62'
-put 'test','63', 'cf:a', 'value63'
-put 'test','64', 'cf:a', 'value64'
-put 'test','65', 'cf:a', 'value65'
-put 'test','66', 'cf:a', 'value66'
-put 'test','67', 'cf:a', 'value67'
-put 'test','68', 'cf:a', 'value68'
-put 'test','69', 'cf:a', 'value69'
-put 'test','70', 'cf:a', 'value70'
-put 'test','71', 'cf:a', 'value71'
-put 'test','72', 'cf:a', 'value72'
-put 'test','73', 'cf:a', 'value73'
-put 'test','74', 'cf:a', 'value74'
-put 'test','75', 'cf:a', 'value75'
-put 'test','76', 'cf:a', 'value76'
-put 'test','77', 'cf:a', 'value77'
-put 'test','78', 'cf:a', 'value78'
-put 'test','79', 'cf:a', 'value79'
-put 'test','80', 'cf:a', 'value80'
-put 'test','81', 'cf:a', 'value81'
-put 'test','82', 'cf:a', 'value82'
-put 'test','83', 'cf:a', 'value83'
-put 'test','84', 'cf:a', 'value84'
-put 'test','85', 'cf:a', 'value85'
-put 'test','86', 'cf:a', 'value86'
-put 'test','87', 'cf:a', 'value87'
-put 'test','88', 'cf:a', 'value88'
-put 'test','89', 'cf:a', 'value89'
-put 'test','90', 'cf:a', 'value90'
-put 'test','91', 'cf:a', 'value91'
-put 'test','92', 'cf:a', 'value92'
-put 'test','93', 'cf:a', 'value93'
-put 'test','94', 'cf:a', 'value94'
-put 'test','95', 'cf:a', 'value95'
-put 'test','96', 'cf:a', 'value96'
-put 'test','97', 'cf:a', 'value97'
-put 'test','98', 'cf:a', 'value98'
-put 'test','99', 'cf:a', 'value99'
-put 'test','100', 'cf:a', 'value100'
+put 'test','1', 'cf:a', 'value1', 100
+put 'test','2', 'cf:a', 'value2', 100
+put 'test','3', 'cf:a', 'value3', 100
+put 'test','4', 'cf:a', 'value4', 100
+put 'test','5', 'cf:a', 'value5', 100
+put 'test','6', 'cf:a', 'value6', 100
+put 'test','7', 'cf:a', 'value7', 100
+put 'test','8', 'cf:a', 'value8', 100
+put 'test','9', 'cf:a', 'value9', 100
+put 'test','10', 'cf:a', 'value10', 100
+put 'test','11', 'cf:a', 'value11', 100
+put 'test','12', 'cf:a', 'value12', 100
+put 'test','13', 'cf:a', 'value13', 100
+put 'test','14', 'cf:a', 'value14', 100
+put 'test','15', 'cf:a', 'value15', 100
+put 'test','16', 'cf:a', 'value16', 100
+put 'test','17', 'cf:a', 'value17', 100
+put 'test','18', 'cf:a', 'value18', 100
+put 'test','19', 'cf:a', 'value19', 100
+put 'test','20', 'cf:a', 'value20', 100
+put 'test','21', 'cf:a', 'value21', 100
+put 'test','22', 'cf:a', 'value22', 100
+put 'test','23', 'cf:a', 'value23', 100
+put 'test','24', 'cf:a', 'value24', 100
+put 'test','25', 'cf:a', 'value25', 100
+put 'test','26', 'cf:a', 'value26', 100
+put 'test','27', 'cf:a', 'value27', 100
+put 'test','28', 'cf:a', 'value28', 100
+put 'test','29', 'cf:a', 'value29', 100
+put 'test','30', 'cf:a', 'value30', 100
+put 'test','31', 'cf:a', 'value31', 100
+put 'test','32', 'cf:a', 'value32', 100
+put 'test','33', 'cf:a', 'value33', 100
+put 'test','34', 'cf:a', 'value34', 100
+put 'test','35', 'cf:a', 'value35', 100
+put 'test','36', 'cf:a', 'value36', 100
+put 'test','37', 'cf:a', 'value37', 100
+put 'test','38', 'cf:a', 'value38', 100
+put 'test','39', 'cf:a', 'value39', 100
+put 'test','40', 'cf:a', 'value40', 100
+put 'test','41', 'cf:a', 'value41', 100
+put 'test','42', 'cf:a', 'value42', 100
+put 'test','43', 'cf:a', 'value43', 100
+put 'test','44', 'cf:a', 'value44', 100
+put 'test','45', 'cf:a', 'value45', 100
+put 'test','46', 'cf:a', 'value46', 100
+put 'test','47', 'cf:a', 'value47', 100
+put 'test','48', 'cf:a', 'value48', 100
+put 'test','49', 'cf:a', 'value49', 100
+put 'test','50', 'cf:a', 'value50', 100
+put 'test','51', 'cf:a', 'value51', 100
+put 'test','52', 'cf:a', 'value52', 100
+put 'test','53', 'cf:a', 'value53', 100
+put 'test','54', 'cf:a', 'value54', 100
+put 'test','55', 'cf:a', 'value55', 100
+put 'test','56', 'cf:a', 'value56', 100
+put 'test','57', 'cf:a', 'value57', 100
+put 'test','58', 'cf:a', 'value58', 100
+put 'test','59', 'cf:a', 'value59', 100
+put 'test','60', 'cf:a', 'value60', 100
+put 'test','61', 'cf:a', 'value61', 100
+put 'test','62', 'cf:a', 'value62', 100
+put 'test','63', 'cf:a', 'value63', 100
+put 'test','64', 'cf:a', 'value64', 100
+put 'test','65', 'cf:a', 'value65', 100
+put 'test','66', 'cf:a', 'value66', 100
+put 'test','67', 'cf:a', 'value67', 100
+put 'test','68', 'cf:a', 'value68', 100
+put 'test','69', 'cf:a', 'value69', 100
+put 'test','70', 'cf:a', 'value70', 100
+put 'test','71', 'cf:a', 'value71', 100
+put 'test','72', 'cf:a', 'value72', 100
+put 'test','73', 'cf:a', 'value73', 100
+put 'test','74', 'cf:a', 'value74', 100
+put 'test','75', 'cf:a', 'value75', 100
+put 'test','76', 'cf:a', 'value76', 100
+put 'test','77', 'cf:a', 'value77', 100
+put 'test','78', 'cf:a', 'value78', 100
+put 'test','79', 'cf:a', 'value79', 100
+put 'test','80', 'cf:a', 'value80', 100
+put 'test','81', 'cf:a', 'value81', 100
+put 'test','82', 'cf:a', 'value82', 100
+put 'test','83', 'cf:a', 'value83', 100
+put 'test','84', 'cf:a', 'value84', 100
+put 'test','85', 'cf:a', 'value85', 100
+put 'test','86', 'cf:a', 'value86', 100
+put 'test','87', 'cf:a', 'value87', 100
+put 'test','88', 'cf:a', 'value88', 100
+put 'test','89', 'cf:a', 'value89', 100
+put 'test','90', 'cf:a', 'value90', 100
+put 'test','91', 'cf:a', 'value91', 100
+put 'test','92', 'cf:a', 'value92', 100
+put 'test','93', 'cf:a', 'value93', 100
+put 'test','94', 'cf:a', 'value94', 100
+put 'test','95', 'cf:a', 'value95', 100
+put 'test','96', 'cf:a', 'value96', 100
+put 'test','97', 'cf:a', 'value97', 100
+put 'test','98', 'cf:a', 'value98', 100
+put 'test','99', 'cf:a', 'value99', 100
+put 'test','100', 'cf:a', 'value100', 100
 snapshot 'test', 'test-snapshot'
 list_snapshots
 
+
+////////////////////Run from Unix shell on HBase master node//////////////////
+// Export the snapshot
+hbase org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot test-snapshot -copy-to /integration-test/data -mappers 16
+
+// Create the hashes for the table. Run the command from unix shell on an HBase
+// node.
+hbase org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=10 --numhashfiles=10 test /integration-test/hashtable
+
+// Export the data into GCS
+hadoop fs -copyToLocal /integration-test /tmp/
+gsutil cp -r /tmp/integration-test gs://<my-bucket>/
+
+// GCS bucket should look like this:
+$ gsutil ls gs://<my-bucket>/integration-test/data
+gs://<my-bucket>/integration-test/data/
+gs://<my-bucket>/integration-test/data/.hbase-snapshot/
+gs://<my-bucket>/integration-test/data/archive/
+$ gsutil ls gs://<my-bucket>/integration-test/hashtable
+gs://<my-bucket>/integration-test/hashtable/manifest
+gs://<my-bucket>/integration-test/hashtable/partitions
+gs://<my-bucket>/integration-test/hashtable/hashes/
+
+// Run from HBase shell. Run `hbase shell` from unix terminal on HBase master.
+// clean up the table
 disable 'test'
 drop 'test'
 exit
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
new file mode 100644
index 0000000000..03ac02e452
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
@@ -0,0 +1,2 @@
+
+test-snapshottestϹ���. (@���������
\ No newline at end of file
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/data.manifest
similarity index 55%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/data.manifest
index 180516dc03..6439f06130 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/data.manifest differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b
similarity index 86%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b
index 5320c6c58d..1b91b948d8 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d
index cbd9f539b3..951eb512ac 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b
index ee586c252e..dc89f02ec2 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe
index 05a0cac912..c7fb208f72 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d
index e8d9789f5e..7638f6eabb 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b
index dc8da56c10..c6ba1f760b 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab
index e77357601a..5a757daec8 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2
index c119dd13ef..d29619e3ec 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85
similarity index 86%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85
index d640fc8498..337b5f9280 100644
Binary files a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/_SUCCESS b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/_SUCCESS
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data
new file mode 100644
index 0000000000..26334294df
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index
new file mode 100644
index 0000000000..f7ac1fc941
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data
new file mode 100644
index 0000000000..87b715673c
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index
new file mode 100644
index 0000000000..4edcbd1ed5
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data
new file mode 100644
index 0000000000..4b59b346f0
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index
new file mode 100644
index 0000000000..4169ee8258
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data
new file mode 100644
index 0000000000..a05197b51d
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index
new file mode 100644
index 0000000000..9228013bfa
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data
new file mode 100644
index 0000000000..6e29b085e7
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index
new file mode 100644
index 0000000000..245c2ceb3f
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data
new file mode 100644
index 0000000000..40cbf30418
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index
new file mode 100644
index 0000000000..dbbacaf8f0
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data
new file mode 100644
index 0000000000..3f0e32269c
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index
new file mode 100644
index 0000000000..a0818358eb
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data
new file mode 100644
index 0000000000..effda57ece
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index
new file mode 100644
index 0000000000..a8eb1a1748
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data
new file mode 100644
index 0000000000..011b956c5f
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index
new file mode 100644
index 0000000000..fada13a256
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data
new file mode 100644
index 0000000000..f55fa79aca
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index
new file mode 100644
index 0000000000..8c8793cef8
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest
new file mode 100644
index 0000000000..a95421d027
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest
@@ -0,0 +1,4 @@
+#Wed Dec 30 01:23:41 UTC 2020
+numHashFiles=10
+table=test
+targetBatchSize=10
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions
new file mode 100644
index 0000000000..1d447dd67a
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
index 62f1cdced2..0320dd1a61 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
@@ -18,37 +18,61 @@
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.api.services.storage.model.Objects;
-import com.google.cloud.bigtable.beam.sequencefiles.testing.BigtableTableUtils;
+import com.google.bigtable.repackaged.com.google.gson.Gson;
+import com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot.ImportOptions;
+import com.google.cloud.bigtable.beam.sequencefiles.HBaseResultToMutationFn;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
 import com.google.cloud.bigtable.hbase.BigtableConfiguration;
 import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
 import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 import org.apache.beam.runners.dataflow.DataflowRunner;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
+import org.apache.beam.sdk.PipelineResult;
 import org.apache.beam.sdk.PipelineResult.State;
 import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
 import org.apache.beam.sdk.extensions.gcp.util.GcsUtil;
 import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /*
- * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable.
+ * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable and
+ * validates the imported data with SyncTable.
  * Prepare test data with gsutil(https://cloud.google.com/storage/docs/quickstart-gsutil):
- * gsutil -m cp -r <PATH_TO_REPO>/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/ \
- *  gs://<test_bucket>/integration-test/
+ * gsutil -m cp -r <PATH_TO_REPO>/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test \
+ *  gs://<test_bucket>/
  *
  * Setup GCP credential: https://cloud.google.com/docs/authentication
  *  Ensure your credential have access to Bigtable and Dataflow
@@ -62,7 +86,7 @@
  */
 public class EndToEndIT {
 
-  private final Log LOG = LogFactory.getLog(getClass());
+  private static Logger LOG = LoggerFactory.getLogger(HBaseResultToMutationFn.class);
   private static final String TEST_SNAPSHOT_NAME = "test-snapshot";
   // Location of test data hosted on Google Cloud Storage, for on-cloud dataflow tests.
   private static final String CLOUD_TEST_DATA_FOLDER = "cloud.test.data.folder";
@@ -87,6 +111,8 @@ public class EndToEndIT {
 
   // Snapshot data setup
   private String hbaseSnapshotDir;
+  private String hashDir;
+  private String syncTableOutputDir;
 
   @Before
   public void setup() throws Exception {
@@ -101,6 +127,13 @@ public void setup() throws Exception {
 
     hbaseSnapshotDir = cloudTestDataFolder + "data/";
     UUID test_uuid = UUID.randomUUID();
+    hashDir = cloudTestDataFolder + "hashtable/";
+
+    syncTableOutputDir = dataflowStagingLocation;
+    if (!syncTableOutputDir.endsWith(File.separator)) {
+      syncTableOutputDir = syncTableOutputDir + File.separator;
+    }
+    syncTableOutputDir = syncTableOutputDir + "sync-table-output/" + test_uuid + "/";
 
     // Cloud Storage config
     GcpOptions gcpOptions = PipelineOptionsFactory.create().as(GcpOptions.class);
@@ -118,6 +151,12 @@ public void setup() throws Exception {
     for (int i = 0; i < keys.length; i++) {
       keySplits[i] = keys[i].getBytes();
     }
+
+    // Create table in Bigtable
+    TableName tableName = TableName.valueOf(tableId);
+    HTableDescriptor descriptor = new HTableDescriptor(tableName);
+    descriptor.addFamily(new HColumnDescriptor(CF));
+    connection.getAdmin().createTable(descriptor, SnapshotTestingUtils.getSplitKeys());
   }
 
   private static String getTestProperty(String name) {
@@ -126,6 +165,19 @@ private static String getTestProperty(String name) {
 
   @After
   public void teardown() throws IOException {
+    final List<GcsPath> paths = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "/*"));
+
+    if (!paths.isEmpty()) {
+      final List<String> pathStrs = new ArrayList<>();
+
+      for (GcsPath path : paths) {
+        pathStrs.add(path.toString());
+      }
+      // TODO: cleanup fails when tests time out. Add a orphan cleaner in the setup()
+      // https://github.com/googleapis/java-bigtable/blob/35588d89b9b243eb691a29d3aff16b9f5a08fbb8/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/test_helpers/env/AbstractTestEnv.java#L108-L119
+      this.gcsUtil.remove(pathStrs);
+    }
+
     connection.close();
 
     // delete test table
@@ -134,18 +186,28 @@ public void teardown() throws IOException {
         .deleteTable(TableName.valueOf(tableId));
   }
 
-  @Test
-  public void testHBaseSnapshotImport() throws Exception {
-
-    // Crete table
-    TableName tableName = TableName.valueOf(tableId);
-    HTableDescriptor descriptor = new HTableDescriptor(tableName);
+  private SyncTableOptions createSyncTableOptions() {
+    DataflowPipelineOptions syncTableOpts =
+        PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    syncTableOpts.setRunner(DataflowRunner.class);
+    syncTableOpts.setGcpTempLocation(dataflowStagingLocation);
+    syncTableOpts.setNumWorkers(1);
+    syncTableOpts.setProject(projectId);
 
-    descriptor.addFamily(new HColumnDescriptor(CF));
+    SyncTableOptions syncOpts = syncTableOpts.as(SyncTableOptions.class);
+    // Setup Bigtable params
+    syncOpts.setBigtableProject(StaticValueProvider.of(projectId));
+    syncOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId));
+    syncOpts.setBigtableTableId(StaticValueProvider.of(tableId));
+    syncOpts.setBigtableAppProfileId(null);
 
-    connection.getAdmin().createTable(descriptor, SnapshotTestingUtils.getSplitKeys());
+    // Setup Hashes
+    syncOpts.setHashTableOutputDir(StaticValueProvider.of(hashDir));
+    syncOpts.setOutputPrefix(StaticValueProvider.of(syncTableOutputDir));
+    return syncOpts;
+  }
 
-    // Start import
+  private ImportOptions createImportOptions() {
     DataflowPipelineOptions importPipelineOpts =
         PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     importPipelineOpts.setRunner(DataflowRunner.class);
@@ -154,10 +216,9 @@ public void testHBaseSnapshotImport() throws Exception {
     importPipelineOpts.setProject(projectId);
     importPipelineOpts.setRegion(region);
 
-    ImportJobFromHbaseSnapshot.ImportOptions importOpts =
-        importPipelineOpts.as(ImportJobFromHbaseSnapshot.ImportOptions.class);
+    ImportOptions importOpts = importPipelineOpts.as(ImportOptions.class);
 
-    // setup GCP and bigtable
+    // setup Bigtable options
     importOpts.setBigtableProject(StaticValueProvider.of(projectId));
     importOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId));
     importOpts.setBigtableTableId(StaticValueProvider.of(tableId));
@@ -165,17 +226,79 @@ public void testHBaseSnapshotImport() throws Exception {
     // setup HBase snapshot info
     importOpts.setHbaseSnapshotSourceDir(hbaseSnapshotDir);
     importOpts.setSnapshotName(TEST_SNAPSHOT_NAME);
+    return importOpts;
+  }
+
+  private Map<String, Long> getCountMap(PipelineResult result) {
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    return StreamSupport.stream(metrics.getCounters().spliterator(), false)
+        .collect(Collectors.toMap((m) -> m.getName().getName(), (m) -> m.getAttempted()));
+  }
+
+  /**
+   * Reads the output of SyncTable job and returns a list of mismatched RangeHashes.
+   *
+   * @throws IOException
+   */
+  private List<RangeHash> readMismatchesFromOutputFiles() throws IOException {
+    Gson gson = new Gson();
+    // Find output files
+    List<GcsPath> outputFiles = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "*"));
+    List<RangeHash> rangeHashes = new ArrayList<>();
+
+    // Read each file line by line and create a RangeHash from it.
+    for (GcsPath outputFile : outputFiles) {
+      int size = (int) gcsUtil.fileSize(outputFile);
+      byte[] fileContents = new byte[size];
+      gcsUtil.open(outputFile).read(ByteBuffer.wrap(fileContents));
+      BufferedReader reader =
+          new BufferedReader(new InputStreamReader(new ByteArrayInputStream(fileContents)));
+      String serializedRangeHash;
+      while ((serializedRangeHash = reader.readLine()) != null) {
+        try {
+          rangeHashes.add(gson.fromJson(serializedRangeHash.trim(), RangeHash.class));
+        } catch (Exception e) {
+          LOG.error("Failed to parse JSON: [" + serializedRangeHash + "]", e);
+          throw e;
+        }
+      }
+    }
+    return rangeHashes;
+  }
+
+  // Asserts that all the rowKeys belong in mismatches.
+  // Throws AssertionException
+  private void validateRowInRangeHashes(List<byte[]> rowKeys, Iterable<RangeHash> mismatches) {
+    for (byte[] mismatchedRowKey : rowKeys) {
+      Assert.assertTrue(containsRow(mismatchedRowKey, mismatches));
+    }
+  }
+
+  // Returns true if the rowKey belongs in one of the ranges contained in rangeHashes.
+  private boolean containsRow(byte[] rowKey, Iterable<RangeHash> rangeHashes) {
+    for (RangeHash mismatchedRange : rangeHashes) {
+      // TODO: There maybe a better Range.belongs() utility function somewhere?
+      // Empty start/end key means that there is no start/end key.
+      if ((mismatchedRange.startInclusive.equals(HConstants.EMPTY_BYTE_ARRAY)
+              || mismatchedRange.startInclusive.compareTo(rowKey) <= 0)
+          && (mismatchedRange.stopExclusive.equals(HConstants.EMPTY_BYTE_ARRAY)
+              || mismatchedRange.stopExclusive.compareTo(rowKey) > 0)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Test
+  public void testHBaseSnapshotImport() throws Exception {
+
+    // Start import
+    ImportOptions importOpts = createImportOptions();
 
     // run pipeline
     State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish();
     Assert.assertEquals(State.DONE, state);
 
-    // check data in bigtable
-    BigtableTableUtils destTable = new BigtableTableUtils(connection, tableId, CF);
-    Assert.assertEquals(
-        100 /* There are 100 rows in test snapshot*/,
-        destTable.readAllCellsFromTable().toArray().length);
-
     // check that the .restore dir used for temp files has been removed
     Objects objects =
         gcsUtil.listObjects(
@@ -185,6 +308,81 @@ public void testHBaseSnapshotImport() throws Exception {
             null);
     Assert.assertNull(objects.getItems());
 
-    // TODO(vermas2012): Add more validations after this.
+    SyncTableOptions syncOpts = createSyncTableOptions();
+
+    PipelineResult result = SyncTableJob.buildPipeline(syncOpts).run();
+    state = result.waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    // Read the output files and validate that there are no mismatches.
+    Assert.assertEquals(0, readMismatchesFromOutputFiles().size());
+
+    // Validate the counters.
+    Map<String, Long> counters = getCountMap(result);
+    Assert.assertEquals(counters.get("ranges_matched"), (Long) 101L);
+    Assert.assertNull(counters.get("ranges_not_matched"));
+  }
+
+  /**
+   * Introduces multiple corruptions in imported table and validates that sync-table can detect
+   * them.
+   */
+  @Test
+  public void testHBaseSnapshotImportWithCorruptions() throws Exception {
+    // Import snapshot
+    ImportOptions importOpts = createImportOptions();
+    State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    // Rows where corruptions will be added.
+    byte[] mismatchRowAtStart = "000".getBytes();
+    byte[] mismatchRowInMiddle = "24".getBytes();
+    byte[] mismatchRowDeleted = "64".getBytes();
+    byte[] mismatchRowAtTheEnd = "999".getBytes();
+
+    // Introduce corruptions to the data in Bigtable. Delete data from Bigtable to simulate Bigtable
+    // missing data. Add data to Bigtable to simulate extra data in Bigtable. It is easier to update
+    // Bigtable than change the snapshots.
+    Table table = connection.getTable(TableName.valueOf(tableId));
+    Cell cellInMiddle = table.get(new Get(mismatchRowInMiddle)).rawCells()[0];
+    List<Put> puts =
+        Arrays.asList(
+            // Add a row at the start
+            new Put(mismatchRowAtStart)
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 1L, "value000".getBytes())
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 2L, "value001".getBytes()),
+            // change a cell in middle
+            new Put(cellInMiddle.getRowArray())
+                .addColumn(
+                    cellInMiddle.getFamilyArray(),
+                    cellInMiddle.getQualifierArray(),
+                    cellInMiddle.getTimestamp(),
+                    "corrupted_val".getBytes()),
+            // add a new row in the end
+            new Put(mismatchRowAtTheEnd)
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 100L, "value999".getBytes()));
+
+    table.put(puts);
+    // Delete a random row in the middle. We should see 4 ranges mismatch as table is split on
+    // 1,2...9. All the updates are happening on a different split.
+    table.delete(new Delete(mismatchRowDeleted));
+
+    // Run SyncTable job and expect 4 mismatches.
+    SyncTableOptions syncOpts = createSyncTableOptions();
+    PipelineResult result = SyncTableJob.buildPipeline(syncOpts).run();
+    state = result.waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    List<RangeHash> syncTableOutputMismatches = readMismatchesFromOutputFiles();
+    Assert.assertEquals(4, syncTableOutputMismatches.size());
+    validateRowInRangeHashes(
+        Arrays.asList(
+            mismatchRowAtStart, mismatchRowAtTheEnd, mismatchRowDeleted, mismatchRowInMiddle),
+        syncTableOutputMismatches);
+
+    // Assert that the output collection is the right one.
+    Map<String, Long> counters = getCountMap(result);
+    Assert.assertEquals(counters.get("ranges_matched"), (Long) 97L);
+    Assert.assertEquals(counters.get("ranges_not_matched"), (Long) 4L);
   }
 }
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
new file mode 100644
index 0000000000..96d5960423
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.SourceTestUtils;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class BufferedHadoopHashTableSourceTest {
+
+  private BufferedHadoopHashTableSource bufferedSource;
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final String HASH_TABLE_OUTPUT_PATH_DIR = "gs://my-bucket/outputDir";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("AAAA".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW =
+      new ImmutableBytesWritable("ZZZZ".getBytes());
+  private static final ImmutableBytesWritable POST_STOP_ROW =
+      new ImmutableBytesWritable("z".getBytes()); // Lowercase z is lexicographically > uppercase Z
+  private static final ImmutableBytesWritable EMPTY_ROW =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+  private static final ImmutableBytesWritable START_HASH =
+      new ImmutableBytesWritable("START-HASH".getBytes());
+  private static final int BATCH_SIZE = 5;
+
+  @Before
+  public void setUp() throws Exception {
+    fakeTableHashWrapper =
+        new FakeTableHashWrapper(
+            START_ROW, STOP_ROW, new ArrayList<>(), new ArrayList<>(), new Scan());
+    bufferedSource =
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of("cbt-dev"),
+                StaticValueProvider.of(HASH_TABLE_OUTPUT_PATH_DIR),
+                START_ROW,
+                STOP_ROW,
+                new FakeTableHashWrapperFactory(fakeTableHashWrapper)),
+            BATCH_SIZE);
+  }
+
+  protected static ImmutableBytesWritable getKey(int keyIndex) {
+    return new ImmutableBytesWritable(("KEY-" + keyIndex).getBytes());
+  }
+
+  protected static ImmutableBytesWritable getHash(int hashIndex) {
+    return new ImmutableBytesWritable(("HASH-" + hashIndex).getBytes());
+  }
+
+  /**
+   * Populates the fakeTableHashWrapper with {@code numEntries} entries starting with startKey.
+   * Returns a List of expected RangeHashes for this data, for numEntries=1, single RangeHash is
+   * returned (startRow, stopRow, START_HASH).
+   */
+  protected List<KV<String, List<RangeHash>>> setupTestData(
+      ImmutableBytesWritable startRow, ImmutableBytesWritable stopRow, int numEntries) {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.hashes.add(KV.of(startRow, START_HASH));
+    for (int i = 0; i < numEntries - 1; i++) {
+      fakeTableHashWrapper.hashes.add(KV.of(getKey(i), getHash(i)));
+    }
+
+    List<KV<String, List<RangeHash>>> out = new ArrayList<>();
+    // Setup RangeHashes to be returned
+    List<RangeHash> expectedRangeHashes = new ArrayList<>();
+    ImmutableBytesWritable key = startRow;
+    ImmutableBytesWritable hash = START_HASH;
+    for (int i = 0; i < numEntries - 1; i++) {
+      expectedRangeHashes.add(RangeHash.of(key, getKey(i), hash));
+      key = getKey(i);
+      hash = getHash(i);
+      if (expectedRangeHashes.size() % BATCH_SIZE == 0) {
+        out.add(
+            KV.of(
+                Bytes.toStringBinary(expectedRangeHashes.get(0).startInclusive.copyBytes()),
+                expectedRangeHashes));
+        expectedRangeHashes = new ArrayList<>();
+      }
+    }
+    // Process the last range
+    expectedRangeHashes.add(RangeHash.of(key, stopRow, hash));
+    // Finalize the last batch
+    out.add(
+        KV.of(
+            Bytes.toStringBinary(expectedRangeHashes.get(0).startInclusive.copyBytes()),
+            expectedRangeHashes));
+
+    return out;
+  }
+
+  @Test
+  public void testHashReaderEmpty() throws IOException {
+    // The tableHashWrapper has no hashes, this should result in empty source.
+    assertEquals(Arrays.asList(), SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderPartialBuffer() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleBatches() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 20);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleBatchesWithPartialBatchAtEnd() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 23);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testSplitEqualsUnsplit() throws Exception {
+    fakeTableHashWrapper.partitions = Arrays.asList(getKey(4), getKey(9));
+    SourceTestUtils.assertSourcesEqualReferenceSource(
+        bufferedSource, bufferedSource.split(0, null), null);
+  }
+
+  @Test
+  public void testUnstartedReaderEqualsStarted() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(
+        bufferedSource.createReader(null), null);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
new file mode 100644
index 0000000000..8c608b74db
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
@@ -0,0 +1,473 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.models.GCRules.GCRULES;
+
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient;
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings;
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.models.CreateTableRequest;
+import com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.emulator.v2.BigtableEmulatorRule;
+import com.google.cloud.bigtable.hbase.BigtableConfiguration;
+import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor.BigtableResultHasher;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(JUnit4.class)
+public class ComputeAndValidateHashFromBigtableDoFnTest {
+
+  private static final byte[] EMPTY_ROW_KEY = HConstants.EMPTY_BYTE_ARRAY;
+  protected final Logger LOG = LoggerFactory.getLogger(getClass());
+
+  public static final String FAKE_TABLE = "fake-table";
+  private static final String ROW_KEY_PREFIX = "row-";
+  private static final String VALUE_PREFIX = "value-";
+  private static final byte[] EXTRA_VALUE = "add".getBytes();
+  private static final byte[] CF = "cf".getBytes();
+  private static final byte[] CF2 = "cf".getBytes();
+  private static final byte[] COL = "col".getBytes();
+  private static final long TS = 1000l;
+  private static final int FIRST_ROW_INDEX = 20;
+  private static final int LAST_ROW_INDEX = 31;
+
+  @Rule public final BigtableEmulatorRule bigtableEmulator = BigtableEmulatorRule.create();
+
+  @Rule public final transient TestPipeline p = TestPipeline.create();
+
+  private ComputeAndValidateHashFromBigtableDoFn doFn;
+
+  // Clients that will be connected to the emulator
+  private BigtableTableAdminClient tableAdminClient;
+  private Table table;
+  // Fake a TableHashWrapper.
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private List<RangeHash> hashes;
+
+  @Before
+  public void setUp() throws IOException {
+    hashes = new ArrayList<>();
+    // Initialize the clients to connect to the emulator
+    tableAdminClient =
+        BigtableTableAdminClient.create(
+            BigtableTableAdminSettings.newBuilderForEmulator(bigtableEmulator.getPort())
+                .setProjectId("fake-project")
+                .setInstanceId("fake-instance")
+                .build());
+
+    CloudBigtableTableConfiguration config =
+        new CloudBigtableTableConfiguration.Builder()
+            .withProjectId("fake-project")
+            .withInstanceId("fake-instance")
+            .withTableId(FAKE_TABLE)
+            .withConfiguration(
+                BigtableOptionsFactory.BIGTABLE_EMULATOR_HOST_KEY,
+                "localhost:" + bigtableEmulator.getPort())
+            .build();
+
+    Connection connection = BigtableConfiguration.connect(config.toHBaseConfig());
+    table = connection.getTable(TableName.valueOf(FAKE_TABLE));
+    fakeTableHashWrapper = new FakeTableHashWrapper();
+    // Scan all the cells for the column, HBase scan fetches 1 cell/column by default
+    fakeTableHashWrapper.scan = new Scan().setMaxVersions();
+
+    FakeTableHashWrapperFactory fakeFactory = new FakeTableHashWrapperFactory(fakeTableHashWrapper);
+
+    doFn =
+        new ComputeAndValidateHashFromBigtableDoFn(
+            config,
+            StaticValueProvider.of(FAKE_TABLE),
+            StaticValueProvider.of("proj"),
+            StaticValueProvider.of("hash"),
+            fakeFactory);
+
+    // Create a test table that can be used in tests
+    tableAdminClient.createTable(
+        CreateTableRequest.of(FAKE_TABLE)
+            .addFamily(new String(CF), GCRULES.maxVersions(100))
+            .addFamily(new String(CF2), GCRULES.maxVersions(100)));
+
+    p.getCoderRegistry().registerCoderForClass(RangeHash.class, new RangeHashCoder());
+
+    // Fill CBT table with data.
+    writeDataToTable();
+  }
+
+  @After
+  public void tearDown() {
+    // TODO should we delete the table for each test?
+    tableAdminClient.deleteTable(FAKE_TABLE);
+  }
+
+  private byte[] getRowKey(int i) {
+    return (ROW_KEY_PREFIX + i).getBytes();
+  }
+
+  private byte[] getValue(int rowIndex, int cellIndex) {
+    return (VALUE_PREFIX + rowIndex + "-" + cellIndex).getBytes();
+  }
+
+  private void writeDataToTable() throws IOException {
+    List<Put> puts = new ArrayList<>();
+    // Tests use the rows 21-30. Setup some extra data simulate the real world scenario where
+    // there will be other workitems working parallely on the table.
+    for (int i = 20; i < 32; i++) {
+      for (int j = 0; j < 2; j++) {
+        // Insert rows with 2 cells each
+        Put put = new Put(getRowKey(i));
+        put.addColumn(CF, COL, TS + j, getValue(i, j));
+        puts.add(put);
+      }
+    }
+    table.put(puts);
+  }
+
+  /** Deletes the row range [startIndex, stopIndex) */
+  private void deleteRange(int startIndex, int stopIndex) throws IOException {
+    for (int i = startIndex; i < stopIndex; i++) {
+      table.delete(new Delete(getRowKey(i)));
+    }
+  }
+
+  // Creates a RangeHash for range [startRow, stopRow).
+  private RangeHash createHash(byte[] startRow, byte[] stopRow) throws IOException {
+    LOG.debug("Creating hash for rows " + startRow + " to " + stopRow);
+    BigtableResultHasher hasher = new BigtableResultHasher();
+    hasher.startBatch(new ImmutableBytesWritable(startRow));
+
+    // Scan all the cells for a column.
+    Scan scan = new Scan().setMaxVersions().withStartRow(startRow).withStopRow(stopRow, false);
+
+    // Read the rows from Bigtable and compute the expected hash.
+    for (Result result : table.getScanner(scan)) {
+      LOG.debug("Adding result to hash: " + result);
+      hasher.hashResult(result);
+    }
+    hasher.finishBatch();
+    return RangeHash.of(
+        new ImmutableBytesWritable(startRow),
+        new ImmutableBytesWritable(stopRow),
+        hasher.getBatchHash());
+  }
+
+  private void validateCounters(
+      PipelineResult result, Long expectedMatches, Long expectedMismatches) {
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    Map<String, Long> counters =
+        StreamSupport.stream(metrics.getCounters().spliterator(), false)
+            .collect(Collectors.toMap((m) -> m.getName().getName(), (m) -> m.getAttempted()));
+    if (expectedMatches > 0) {
+      Assert.assertEquals(expectedMatches, counters.get("ranges_matched"));
+    }
+    if (expectedMismatches > 0) {
+      Assert.assertEquals(expectedMismatches, counters.get("ranges_not_matched"));
+    }
+  }
+
+  ////////// Happy case tests for various setups//////////////////////
+  @Test
+  public void testHashMatchesForMultipleRange() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(28)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 0L);
+  }
+
+  @Test
+  public void testHashMatchesForSingleRange() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder();
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 0L);
+  }
+
+  @Test
+  public void testHashMatchesForFullTableScanWithMultipleRange() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), EMPTY_ROW_KEY));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 0L);
+  }
+
+  @Test
+  public void testHashMatchesForMultipleSingleRowRange() throws Exception {
+    hashes.add(createHash(getRowKey(22), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(25)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(22)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    PipelineResult result = p.run();
+    validateCounters(result, 3L, 0L);
+  }
+
+  ///////////////// Test mismatches when Bigtable has extra rows ////////////////////
+  @Test
+  public void testAdditionalCellInMiddle() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(30)));
+
+    // Add an extra cell in the table
+    table.put(new Put(getRowKey(25)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(1));
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 1L);
+  }
+
+  @Test
+  public void testAdditionalRowsAtEnds() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Add an extra row in the beginning
+    table.put(new Put(getRowKey(1)).addColumn(CF, COL, EXTRA_VALUE));
+
+    // Add an extra row at the end.
+    table.put(new Put(getRowKey(5)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(0), hashes.get(2));
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 2L);
+  }
+
+  ///////////////////// Test different values ///////////////////////////
+  @Test
+  public void testDifferentValues() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Modify the CF
+    table.delete(new Delete(getRowKey(20)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(1)).addColumn(CF2, COL, TS, getValue(20, 0)));
+
+    // Modify the qualifier
+    table.delete(new Delete(getRowKey(22)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(22)).addColumn(CF, "random-col".getBytes(), TS, getValue(22, 0)));
+
+    // Modify the timestamp
+    table.delete(new Delete(getRowKey(24)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(24)).addColumn(CF, COL, 1, getValue(24, 0)));
+
+    // Modify the value
+    table.delete(new Delete(getRowKey(26)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(26)).addColumn(CF, COL, getValue(26, 0)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(1), hashes.get(2), hashes.get(3));
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 4L);
+  }
+
+  ////////////////// Tests with CBT missing data //////////////////////////////
+  @Test
+  public void testMissingRows() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Delete a row at the beginning
+    table.delete(new Delete(getRowKey(FIRST_ROW_INDEX)));
+
+    // Delete a row at the middle
+    table.delete(new Delete(getRowKey(24)));
+
+    // Delete a row at the end
+    table.delete(new Delete(getRowKey(LAST_ROW_INDEX)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4));
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 3L);
+  }
+
+  @Test
+  public void testMissingRanges() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete a range at the beginning
+    deleteRange(FIRST_ROW_INDEX, 21);
+
+    // Delete a range in middle
+    deleteRange(23, 25);
+
+    // Delete row ranges at the end, bigtable scanner will finish with multiple row-ranges to
+    // process.
+    deleteRange(27, LAST_ROW_INDEX + 1);
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4), hashes.get(5));
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 4L);
+  }
+
+  @Test
+  public void testCbtEmpty() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete all data from bigtable
+    deleteRange(FIRST_ROW_INDEX, LAST_ROW_INDEX);
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes);
+    PipelineResult result = p.run();
+    validateCounters(result, 0L, 3L);
+  }
+
+  ////////////////////// Test that scan is used from TableHash.////////////////////////
+  @Test
+  public void testScanFromTableHash() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(30)));
+
+    // Update the TableHashWrapper Scan to default. Scan from HashTable.TableHash determines the
+    // cells used to compute hash. CBT has to use the same cells for validation.
+    fakeTableHashWrapper.scan = new Scan();
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes);
+    PipelineResult result = p.run();
+    validateCounters(result, 0L, 3L);
+  }
+
+  ////////////////////// Combination of different cases //////////////////////////////////
+  @Test
+  public void testMismatchesComprehensive() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete a range at the beginning from CBT
+    deleteRange(FIRST_ROW_INDEX, 21);
+
+    // Delete a row in middle from CBT
+    table.delete(new Delete(getRowKey(23)));
+
+    // Update a value in CBT
+    table.delete(new Delete(getRowKey(27)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(27)).addColumn(CF, COL, getValue(27, 0)));
+
+    // Add an extra row at the end.
+    table.put(new Put(getRowKey(5)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4), hashes.get(5));
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 4L);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
new file mode 100644
index 0000000000..ee2b6814e2
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.gson.Gson;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * A fake for TableHashWrapper that allows us to mock the behavior of hbase's HashTable.TableHash
+ */
+public class FakeTableHashWrapper implements TableHashWrapper {
+
+  // Sorted list of partition keys splitting the key range.
+  public List<ImmutableBytesWritable> partitions;
+  // List of <Key,Hash> sorted by key.
+  public List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> hashes;
+  public ImmutableBytesWritable startRowInclusive;
+  public ImmutableBytesWritable stopRowExclusive;
+  public Scan scan;
+  private static final long serialVersionUID = 34876543L;
+
+  public FakeTableHashWrapper() {
+    this(
+        new ImmutableBytesWritable(),
+        new ImmutableBytesWritable(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        new Scan());
+  }
+
+  public FakeTableHashWrapper(
+      ImmutableBytesWritable startRowInclusive,
+      ImmutableBytesWritable stopRowExclusive,
+      List<ImmutableBytesWritable> partitions,
+      List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> hashes,
+      Scan scan) {
+    super();
+    this.startRowInclusive = startRowInclusive;
+    this.stopRowExclusive = stopRowExclusive;
+    this.partitions = partitions;
+    this.hashes = hashes;
+    this.scan = scan;
+  }
+
+  @Override
+  public int getNumHashFiles() {
+    return partitions.size() + 1;
+  }
+
+  @Override
+  public ImmutableList<ImmutableBytesWritable> getPartitions() {
+    return ImmutableList.copyOf(partitions);
+  }
+
+  @Override
+  public ImmutableBytesWritable getStartRow() {
+    return startRowInclusive;
+  }
+
+  @Override
+  public ImmutableBytesWritable getStopRow() {
+    return stopRowExclusive;
+  }
+
+  @Override
+  public Scan getScan() {
+    return scan;
+  }
+
+  @Override
+  public TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow) {
+    return new FakeTableHashReader(startRow);
+  }
+
+  private void writeObject(ObjectOutputStream s) throws IOException {
+    Gson gson = new Gson();
+    s.writeObject(gson.toJson(scan));
+    s.writeObject(gson.toJson(startRowInclusive));
+    s.writeObject(gson.toJson(stopRowExclusive));
+    s.writeObject(gson.toJson(partitions));
+    s.writeObject(gson.toJson(hashes));
+  }
+
+  private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
+    Gson gson = new Gson();
+    scan = gson.fromJson((String) s.readObject(), Scan.class);
+    startRowInclusive = gson.fromJson((String) s.readObject(), ImmutableBytesWritable.class);
+    stopRowExclusive = gson.fromJson((String) s.readObject(), ImmutableBytesWritable.class);
+    partitions = gson.fromJson((String) s.readObject(), ArrayList.class);
+    hashes = gson.fromJson((String) s.readObject(), ArrayList.class);
+  }
+
+  public class FakeTableHashReader implements TableHashReader {
+    private final ImmutableBytesWritable startRow;
+    // Copy of items to be read by this reader.
+    private final List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> entriesToRead;
+    // First next() will make index = 0, and compare it with the size of entriesToRead.
+    private int index = -1;
+
+    public FakeTableHashReader(ImmutableBytesWritable startRow) {
+      this.startRow = startRow;
+      entriesToRead = new ArrayList<>();
+      for (KV<ImmutableBytesWritable, ImmutableBytesWritable> hash : hashes) {
+        // Collect all the entries after startRow.
+        if (hash.getKey().compareTo(startRow) >= 0) {
+          entriesToRead.add(hash);
+        }
+      }
+    }
+
+    @Override
+    public boolean next() throws IOException {
+      return ++index < entriesToRead.size();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentKey() {
+      return entriesToRead.get(index).getKey();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentHash() {
+      return entriesToRead.get(index).getValue();
+    }
+
+    @Override
+    public void close() throws IOException {
+      // NOOP
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
new file mode 100644
index 0000000000..2e65e3b855
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+public class FakeTableHashWrapperFactory extends TableHashWrapperFactory {
+
+  private static final long serialVersionUID = 269854624L;
+
+  private final FakeTableHashWrapper fakeTableHashWrapper;
+
+  public FakeTableHashWrapperFactory(FakeTableHashWrapper wrapper) {
+    this.fakeTableHashWrapper = wrapper;
+  }
+
+  @Override
+  public TableHashWrapper getTableHash(String projectId, String sourceHashDir) {
+    return fakeTableHashWrapper;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
new file mode 100644
index 0000000000..fa88a56d14
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.SourceTestUtils;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HadoopHashBasedReaderTest {
+
+  private HadoopHashTableSource hashTableSource;
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final String HASH_TABLE_OUTPUT_PATH_DIR = "gs://my-bucket/outputDir";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("AAAA".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW =
+      new ImmutableBytesWritable("ZZZZ".getBytes());
+  private static final ImmutableBytesWritable EMPTY_ROW =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+  private static final ImmutableBytesWritable START_HASH =
+      new ImmutableBytesWritable("START-HASH".getBytes());
+
+  @Before
+  public void setUp() throws Exception {
+    fakeTableHashWrapper =
+        new FakeTableHashWrapper(
+            START_ROW, STOP_ROW, new ArrayList<>(), new ArrayList<>(), new Scan());
+    hashTableSource =
+        new HadoopHashTableSource(
+            StaticValueProvider.of("cbt-dev"),
+            StaticValueProvider.of(HASH_TABLE_OUTPUT_PATH_DIR),
+            START_ROW,
+            STOP_ROW,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+  }
+
+  protected static ImmutableBytesWritable getKey(int keyIndex) {
+    return new ImmutableBytesWritable(("KEY-" + keyIndex).getBytes());
+  }
+
+  protected static ImmutableBytesWritable getHash(int hashIndex) {
+    return new ImmutableBytesWritable(("HASH-" + hashIndex).getBytes());
+  }
+
+  /**
+   * Populates the fakeTableHashWrapper with {@code numEntries} entries starting with startKey.
+   * Returns a List of expected RangeHashes for this data, for numEntries=1, single RangeHash is
+   * returned (startRow, stopRow, START_HASH).
+   */
+  protected List<RangeHash> setupTestData(
+      ImmutableBytesWritable startRow, ImmutableBytesWritable stopRow, int numEntries) {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.hashes.add(KV.of(startRow, START_HASH));
+    for (int i = 0; i < numEntries - 1; i++) {
+      fakeTableHashWrapper.hashes.add(KV.of(getKey(i), getHash(i)));
+    }
+
+    // Setup RangeHashes to be returned
+    List<RangeHash> expectedRangeHashes = new ArrayList<>();
+    ImmutableBytesWritable key = startRow;
+    ImmutableBytesWritable hash = START_HASH;
+    for (int i = 0; i < numEntries - 1; i++) {
+      expectedRangeHashes.add(RangeHash.of(key, getKey(i), hash));
+      key = getKey(i);
+      hash = getHash(i);
+    }
+    expectedRangeHashes.add(RangeHash.of(key, stopRow, hash));
+    return expectedRangeHashes;
+  }
+
+  /////////////////////////////// Test the end of HashTable Output /////////////////////////
+
+  @Test
+  public void testHashReaderEmpty() throws IOException {
+    // The tableHashWrapper has no hashes, this should result in empty source.
+    assertEquals(Arrays.asList(), SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderSingleHashBatch() throws IOException {
+    // Setup 1 entry in this hashtable datafile. The test is setup so that HashTable datafile has
+    // only 1 entry.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 1);
+
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleHashBatch() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  //////////////////// Test the end of HashTable output when end of range is ""/////////////////
+  @Test
+  public void testHashReaderWithEmptyEndRow() throws IOException {
+    // Setup 4 entries in this hashtable datafile with no start or stop keys set.
+    List<RangeHash> expected = setupTestData(EMPTY_ROW, EMPTY_ROW, 4);
+    hashTableSource.startRowInclusive = EMPTY_ROW;
+    hashTableSource.stopRowExclusive = EMPTY_ROW;
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  /////////////////////////////// Test reader.getCurrent() >= stopRow /////////////////////////
+
+  @Test
+  public void testHashReaderWorkItemEndedOnFirstBatch() throws IOException {
+    // Setup 1 entry in this hashtable datafile. This entry is outside of the workitem's row
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, START_HASH));
+    // Source will be empty as no hashes fall in its bounds.
+    assertEquals(new ArrayList<RangeHash>(), SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderWorkItemEndedOnSecondEntry() throws IOException {
+    // Setup 1 entry in this hashtable datafile. The test is setup so that HashTable datafile has
+    // only 1 entry.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 1);
+    // Add a next entry at the stop row. Reader should stop and read just 1 entry.
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, getHash(100)));
+
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderWorkItemEndedAfterMultipleBatches() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    // Add a next entry at the stop row. Reader should stop and read just 4 entry.
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, getHash(100)));
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testSplitEqualsUnsplit() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    fakeTableHashWrapper.partitions = Arrays.asList(getKey(2), getKey(4));
+    SourceTestUtils.assertSourcesEqualReferenceSource(
+        hashTableSource, hashTableSource.split(1, null), null);
+  }
+
+  @Test
+  public void testUnstartedReaderEqualsStarted() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(
+        hashTableSource.createReader(null), null);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
new file mode 100644
index 0000000000..a3aba3f756
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.HashBasedReader;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.util.List;
+import junit.framework.TestCase;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.BoundedSource.BoundedReader;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HadoopHashTableSourceTest extends TestCase {
+
+  HadoopHashTableSource source;
+  FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final ValueProvider<String> PROJECT_ID = StaticValueProvider.of("test-project");
+  private static final ValueProvider<String> HASH_TABLE_OUTPUT_PATH_DIR =
+      StaticValueProvider.of("gs://my-bucket/outputDir");
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("a".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW = new ImmutableBytesWritable("z".getBytes());
+  private static final ImmutableBytesWritable PARTITION1 =
+      new ImmutableBytesWritable("d".getBytes());
+  private static final ImmutableBytesWritable PARTITION2 =
+      new ImmutableBytesWritable("g".getBytes());
+  private static final ImmutableBytesWritable EMPTY_ROW_KEY =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    fakeTableHashWrapper = new FakeTableHashWrapper();
+  }
+
+  private List<BoundedSource<RangeHash>> getSplitSources(
+      List<ImmutableBytesWritable> partitions,
+      ImmutableBytesWritable startRow,
+      ImmutableBytesWritable stopRow)
+      throws IOException {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.partitions = partitions;
+
+    source =
+        new HadoopHashTableSource(
+            PROJECT_ID,
+            HASH_TABLE_OUTPUT_PATH_DIR,
+            startRow,
+            stopRow,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+    return (List<BoundedSource<RangeHash>>) source.split(0, null);
+  }
+
+  private void testSourceSplits(
+      List<ImmutableBytesWritable> partitions,
+      ImmutableBytesWritable startRow,
+      ImmutableBytesWritable stopRow,
+      List<BoundedSource<RangeHash>> expectedSources)
+      throws IOException {
+    assertEquals(expectedSources, getSplitSources(partitions, startRow, stopRow));
+  }
+
+  @Test
+  public void testSplitZeroPartitions() throws IOException {
+    // Row range [a-z) with no splits.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, STOP_ROW));
+    testSourceSplits(ImmutableList.of(), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitOnePartition() throws IOException {
+    // Row range [a-z) with 1 splits.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testMultiplePartitons() throws IOException {
+    // Row range [a-z) with splits on {d,g}. The data files will be for {[a,d), [d,g), [g,z)}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitEmptyStartRow() throws IOException {
+    // Row range [""-z) with splits on {d,g}. The data files will be for {["",d), [d,g), [g,z)}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, EMPTY_ROW_KEY, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), EMPTY_ROW_KEY, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitEmptyStopRow() throws IOException {
+    // Row range [a-"") with splits on {d,g}. The data files will be for {[a,d), [d,g), [g,"")}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, EMPTY_ROW_KEY));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), START_ROW, EMPTY_ROW_KEY, expected);
+  }
+
+  @Test
+  public void testSplitFullTableScan() throws IOException {
+    // Row range [""-"") with splits on {d,g}. The data files will be for {["",d), [d,g), [g,"")}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, EMPTY_ROW_KEY, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, EMPTY_ROW_KEY));
+    testSourceSplits(
+        ImmutableList.of(PARTITION1, PARTITION2), EMPTY_ROW_KEY, EMPTY_ROW_KEY, expected);
+  }
+
+  @Test
+  public void testCreateReaderWithoutSplit() throws IOException {
+    source =
+        new HadoopHashTableSource(
+            PROJECT_ID,
+            HASH_TABLE_OUTPUT_PATH_DIR,
+            // When split is not called, start/stop are uninitialized. Start/stop are runtime params
+            // and are initialized in split/createReader.
+            null,
+            null,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+    // Setup boundaries on the TableHashWrapper to be used in Source.
+    fakeTableHashWrapper.startRowInclusive = START_ROW;
+    fakeTableHashWrapper.stopRowExclusive = STOP_ROW;
+
+    // Create a new Reader
+    BoundedReader<RangeHash> reader = source.createReader(null);
+
+    // Validate that the reader was properly created.
+    assertEquals(HashBasedReader.class, reader.getClass());
+    assertEquals(source, reader.getCurrentSource());
+    HashBasedReader hashBasedReader = (HashBasedReader) reader;
+    assertEquals(START_ROW, hashBasedReader.startRowInclusive);
+    assertEquals(STOP_ROW, hashBasedReader.stopRowExclusive);
+  }
+
+  @Test
+  public void testCreateReaderAfterSplit() throws IOException {
+    // Single partitions will return a 2 sources.
+    List<BoundedSource<RangeHash>> splitSources =
+        getSplitSources(ImmutableList.of(PARTITION1), START_ROW, STOP_ROW);
+    BoundedSource<RangeHash> splitHashSource = splitSources.get(0);
+
+    // Create a new Reader
+    BoundedReader<RangeHash> reader = splitHashSource.createReader(null);
+
+    // Validate that the reader was properly created.
+    assertEquals(HashBasedReader.class, reader.getClass());
+    assertEquals(splitHashSource, reader.getCurrentSource());
+    HashBasedReader hashBasedReader = (HashBasedReader) reader;
+    assertEquals(START_ROW, hashBasedReader.startRowInclusive);
+    assertEquals(PARTITION1, hashBasedReader.stopRowExclusive);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
new file mode 100644
index 0000000000..f58becf3cb
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.common.truth.Truth.assertWithMessage;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import junit.framework.TestCase;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HashBasedSourceSerializationTest extends TestCase {
+
+  public static final String SOURCE_HASH_DIR = "gs://my-bucket/outputDir";
+  public static final String PROJECT_ID = "test-project";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("a".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW = new ImmutableBytesWritable("y".getBytes());
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+  }
+
+  @Test
+  public void testSerializeWithValueProviders() throws IOException {
+    checkSerialization(
+        new HadoopHashTableSource(
+            StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR)));
+  }
+
+  @Test
+  public void testSerializeWithStartStop() throws IOException {
+    checkSerialization(
+        new HadoopHashTableSource(
+            StaticValueProvider.of(PROJECT_ID),
+            StaticValueProvider.of(SOURCE_HASH_DIR),
+            new ImmutableBytesWritable(START_ROW),
+            new ImmutableBytesWritable(STOP_ROW)));
+  }
+
+  @Test
+  public void testBufferedSourceSerialize() {
+    checkSerialization(
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR))));
+  }
+
+  @Test
+  public void testBufferedSourceSerializeWithBatchSize() {
+    checkSerialization(
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR)),
+            5));
+  }
+
+  private static void checkSerialization(Object source) {
+    try {
+      Object deserialized = serializeDeserialize(source);
+      checkClassDeclaresSerialVersionUid(source.getClass());
+      assertEquals(source, deserialized);
+    } catch (IOException | ClassNotFoundException e) {
+      fail(e.toString());
+    }
+  }
+
+  private static void checkClassDeclaresSerialVersionUid(Class cls) {
+    String uid = "serialVersionUID";
+    for (Field field : cls.getDeclaredFields()) {
+      if (field.getName() == uid) {
+        int modifiers = field.getModifiers();
+        assertWithMessage(field + " is not static").that(Modifier.isStatic(modifiers)).isTrue();
+        assertWithMessage(field + " is not final").that(Modifier.isFinal(modifiers)).isTrue();
+        assertWithMessage(field + " is not private").that(Modifier.isPrivate(modifiers)).isTrue();
+        assertWithMessage(field + " must be long")
+            .that(field.getType().getSimpleName())
+            .isEqualTo("long");
+        return;
+      }
+    }
+    fail(cls + " does not declare serialVersionUID");
+  }
+
+  private static Object serializeDeserialize(Object obj)
+      throws IOException, ClassNotFoundException {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    try (ObjectOutputStream outStream = new ObjectOutputStream(bos)) {
+      outStream.writeObject(obj);
+    }
+
+    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
+    try (ObjectInputStream inStream = new ObjectInputStream(bis)) {
+      return inStream.readObject();
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
new file mode 100644
index 0000000000..5f644e3b50
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.testing.CoderProperties;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class RangeHashCoderTest {
+  private static final RangeHashCoder TEST_CODER = new RangeHashCoder();
+  private static final ImmutableBytesWritable START =
+      new ImmutableBytesWritable("Start".getBytes());
+  private static final ImmutableBytesWritable STOP = new ImmutableBytesWritable("Stop".getBytes());
+  private static final ImmutableBytesWritable HASH = new ImmutableBytesWritable("hash".getBytes());
+  private static final ImmutableBytesWritable EMPTY =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+
+  @Test
+  public void encodeRangeHash() throws Exception {
+    CoderProperties.coderDecodeEncodeEqual(TEST_CODER, RangeHash.of(START, STOP, HASH));
+  }
+
+  @Test(expected = CoderException.class)
+  public void encodeNullThrowsCoderException() throws Exception {
+    CoderUtils.encodeToByteArray(TEST_CODER, null);
+  }
+
+  @Test
+  public void testEncodedTypeDescriptor() throws Exception {
+    Assert.assertEquals(TEST_CODER.getEncodedTypeDescriptor(), TypeDescriptor.of(RangeHash.class));
+  }
+}