From 28547a28a916d413ac1e5c7712111926e8d56b9e Mon Sep 17 00:00:00 2001
From: shitanshu verma <shitanshu@google.com>
Date: Tue, 2 Feb 2021 13:15:00 -0500
Subject: [PATCH 1/8] feat: add a new pipeline to validate data imported into
 cloud bigtable from HBase.

---
 .../bigtable-beam-import/pom.xml              |  18 +
 .../com/google/cloud/bigtable/beam/Main.java  |   4 +
 .../cloud/bigtable/beam/TemplateUtils.java    |  14 +
 .../BufferedHadoopHashTableSource.java        | 211 ++++++++
 ...omputeAndValidateHashFromBigtableDoFn.java | 232 +++++++++
 .../validation/HadoopHashTableSource.java     | 464 ++++++++++++++++++
 .../beam/validation/RangeHashCoder.java       | 105 ++++
 .../beam/validation/SyncTableJob.java         | 199 ++++++++
 .../beam/validation/SyncTableUtils.java       |  55 +++
 .../beam/validation/TableHashWrapper.java     |  55 +++
 .../validation/TableHashWrapperFactory.java   |  33 ++
 .../beam/validation/TableHashWrapperImpl.java | 119 +++++
 .../mapreduce/BigtableTableHashAccessor.java  |  77 +++
 .../test-snapshot/..snapshotinfo.crc          | Bin 12 -> 0 bytes
 .../test-snapshot/.data.manifest.crc          | Bin 20 -> 0 bytes
 .../test-snapshot/.snapshotinfo               |   2 -
 .../cf/.b0f68aca966b48f1b171614e582b1cbb.crc  | Bin 52 -> 0 bytes
 .../cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc  | Bin 52 -> 0 bytes
 .../cf/.c2945aa8dac34922913a1f60fedb6154.crc  | Bin 52 -> 0 bytes
 .../cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc  | Bin 52 -> 0 bytes
 .../cf/.d8b49b374391407ba35d5e0db1c835c9.crc  | Bin 52 -> 0 bytes
 .../cf/.32053565831341128b8d8f5567d48fdc.crc  | Bin 52 -> 0 bytes
 .../cf/.36798a163ed046b193818e21dd7516b4.crc  | Bin 52 -> 0 bytes
 .../cf/.65b9c6860f5f4de39d61d1674947b030.crc  | Bin 52 -> 0 bytes
 .../cf/.b83044f76ba6474aa829e3bae7fd82d1.crc  | Bin 52 -> 0 bytes
 .../src/test/generate_test_data.txt           | 226 +++++----
 .../test-snapshot/.snapshotinfo               |   2 +
 .../test-snapshot/data.manifest               | Bin 1090 -> 1090 bytes
 .../cf/0ad53893d268478f9b2484cbb6016d9b}      | Bin 5264 -> 5264 bytes
 .../cf/9926df0da08b4f51a33517afb040f82d}      | Bin 5264 -> 5264 bytes
 .../cf/966e85699fdd4680a8c6fbf4b41b6e4b}      | Bin 5264 -> 5264 bytes
 .../cf/bab07e8089634e629a4c111ea2b415fe}      | Bin 5264 -> 5264 bytes
 .../cf/7fef5694213b4be0ad79f79c45200c2d}      | Bin 5264 -> 5264 bytes
 .../cf/f8d40658d79b4a7191f21bcf14ae289b}      | Bin 5264 -> 5264 bytes
 .../cf/afe596ef5c61440983da2dcb54d581ab}      | Bin 5264 -> 5264 bytes
 .../cf/2c766f1fc8eb460dbfa9a3803138c9b2}      | Bin 5264 -> 5264 bytes
 .../cf/e59edc08de6d441689288f04c7c0fe85}      | Bin 5299 -> 5299 bytes
 .../hashtable/hashes/_SUCCESS                 |   0
 .../hashtable/hashes/part-r-00000/data        | Bin 0 -> 158 bytes
 .../hashtable/hashes/part-r-00000/index       | Bin 0 -> 220 bytes
 .../hashtable/hashes/part-r-00001/data        | Bin 0 -> 534 bytes
 .../hashtable/hashes/part-r-00001/index       | Bin 0 -> 221 bytes
 .../hashtable/hashes/part-r-00002/data        | Bin 0 -> 499 bytes
 .../hashtable/hashes/part-r-00002/index       | Bin 0 -> 221 bytes
 .../hashtable/hashes/part-r-00003/data        | Bin 0 -> 499 bytes
 .../hashtable/hashes/part-r-00003/index       | Bin 0 -> 221 bytes
 .../hashtable/hashes/part-r-00004/data        | Bin 0 -> 499 bytes
 .../hashtable/hashes/part-r-00004/index       | Bin 0 -> 221 bytes
 .../hashtable/hashes/part-r-00005/data        | Bin 0 -> 499 bytes
 .../hashtable/hashes/part-r-00005/index       | Bin 0 -> 221 bytes
 .../hashtable/hashes/part-r-00006/data        | Bin 0 -> 499 bytes
 .../hashtable/hashes/part-r-00006/index       | Bin 0 -> 221 bytes
 .../hashtable/hashes/part-r-00007/data        | Bin 0 -> 499 bytes
 .../hashtable/hashes/part-r-00007/index       | Bin 0 -> 221 bytes
 .../hashtable/hashes/part-r-00008/data        | Bin 0 -> 499 bytes
 .../hashtable/hashes/part-r-00008/index       | Bin 0 -> 221 bytes
 .../hashtable/hashes/part-r-00009/data        | Bin 0 -> 499 bytes
 .../hashtable/hashes/part-r-00009/index       | Bin 0 -> 221 bytes
 .../test/integration-test/hashtable/manifest  |   4 +
 .../integration-test/hashtable/partitions     | Bin 0 -> 342 bytes
 .../beam/hbasesnapshots/EndToEndIT.java       | 181 ++++++-
 .../BufferedHadoopHashTableSourceTest.java    | 162 ++++++
 ...teAndValidateHashFromBigtableDoFnTest.java | 444 +++++++++++++++++
 .../beam/validation/FakeTableHashWrapper.java | 153 ++++++
 .../FakeTableHashWrapperFactory.java          |  32 ++
 .../validation/HadoopHashBasedReaderTest.java | 181 +++++++
 .../validation/HadoopHashTableSourceTest.java | 209 ++++++++
 .../HashBasedSourceSerializationTest.java     | 127 +++++
 .../beam/validation/RangeHashCoderTest.java   |  51 ++
 69 files changed, 3235 insertions(+), 125 deletions(-)
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc
 delete mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{ => integration-test}/data/.hbase-snapshot/test-snapshot/data.manifest (55%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc => integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b} (86%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 => integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d} (87%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 => integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b} (87%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 => integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe} (87%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 => integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d} (87%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb => integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b} (87%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 => integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab} (87%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 => integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2} (87%)
 rename bigtable-dataflow-parent/bigtable-beam-import/src/test/{data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 => integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85} (86%)
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/_SUCCESS
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
index 218dc06db8..8ee5ba861b 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
+++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
@@ -26,6 +26,7 @@ limitations under the License.
 
   <properties>
     <mainClass>com.google.cloud.bigtable.beam.Main</mainClass>
+    <skipITs>false</skipITs>
   </properties>
 
   <!-- Adding this to resolve version conflict within beam sdk-->
@@ -217,6 +218,23 @@ limitations under the License.
       <version>${hbase.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>com.google.truth</groupId>
+      <artifactId>truth</artifactId>
+      <version>1.0.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.auto.service</groupId>
+      <artifactId>auto-service-annotations</artifactId>
+      <version>1.0-rc7</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.cloud</groupId>
+      <artifactId>google-cloud-bigtable-emulator</artifactId>
+      <version>0.124.0</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java
index b346b90837..1f52f5125a 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java
@@ -21,6 +21,7 @@
 import com.google.cloud.bigtable.beam.sequencefiles.CreateTableHelper;
 import com.google.cloud.bigtable.beam.sequencefiles.ExportJob;
 import com.google.cloud.bigtable.beam.sequencefiles.ImportJob;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob;
 import java.io.File;
 import java.net.URISyntaxException;
 import java.util.Arrays;
@@ -53,6 +54,9 @@ public static void main(String[] args) throws Exception {
       case "create-table":
         CreateTableHelper.main(subArgs);
         break;
+      case "sync-table":
+        SyncTableJob.main(subArgs);
+        break;
       default:
         usage();
         System.exit(1);
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
index e64507317b..f839a50b23 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
@@ -26,6 +26,7 @@
 import com.google.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.Query;
 import com.google.cloud.bigtable.beam.sequencefiles.ExportJob.ExportOptions;
 import com.google.cloud.bigtable.beam.sequencefiles.ImportJob.ImportOptions;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
 import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
 import com.google.cloud.bigtable.hbase.adapters.Adapters;
 import com.google.cloud.bigtable.hbase.adapters.read.DefaultReadHooks;
@@ -72,6 +73,19 @@ public static CloudBigtableTableConfiguration BuildImportConfig(ImportOptions op
     return builder.build();
   }
 
+  /** Builds CloudBigtableTableConfiguration from input runtime parameters for import job. */
+  public static CloudBigtableTableConfiguration BuildSyncTableConfig(SyncTableOptions opts) {
+    CloudBigtableTableConfiguration.Builder builder =
+        new CloudBigtableTableConfiguration.Builder()
+            .withProjectId(opts.getBigtableProject())
+            .withInstanceId(opts.getBigtableInstanceId())
+            .withTableId(opts.getBigtableTableId());
+    if (opts.getBigtableAppProfileId() != null) {
+      builder.withAppProfileId(opts.getBigtableAppProfileId());
+    }
+    return builder.build();
+  }
+
   /** Provides a request that is constructed with some attributes. */
   private static class RequestValueProvider
       implements ValueProvider<ReadRowsRequest>, Serializable {
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
new file mode 100644
index 0000000000..eb018832ce
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.api.core.InternalApi;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.values.KV;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Buffers the RangeHashes generated by {@link HadoopHashTableSource}. This is an optimization that
+ * allows {@link ComputeAndValidateHashFromBigtableDoFn} to issue fewer ReadRow APIs with larger row
+ * ranges.
+ *
+ * <p>Hadoop HashTable output is sorted by row-key and contains a row-range and hash. Beam
+ * Pcollection do not guarantee any ordering. To fetch a batch of ranges in 1 ReadRows operation,
+ * this source buffers then and outputs a List<RangeHash> guaranteeing the sorted order of ranges.
+ */
+@InternalApi
+class BufferedHadoopHashTableSource extends BoundedSource<KV<String, List<RangeHash>>> {
+
+  private static final long serialVersionUID = 39842743L;
+
+  public static final Log LOG = LogFactory.getLog(BufferedHadoopHashTableSource.class);
+  private static final int DEFAULT_BATCH_SIZE = 50;
+
+  // Max number of RangeHashes to buffer.
+  private int maxBufferSize;
+  private HadoopHashTableSource hashTableSource;
+  private Coder<KV<String, List<RangeHash>>> coder;
+
+  public BufferedHadoopHashTableSource(HadoopHashTableSource source) {
+    this(source, DEFAULT_BATCH_SIZE);
+  }
+
+  public BufferedHadoopHashTableSource(HadoopHashTableSource hashTableSource, int maxBufferSize) {
+    this.hashTableSource = hashTableSource;
+    this.coder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(RangeHashCoder.of()));
+    this.maxBufferSize = maxBufferSize;
+  }
+
+  @Override
+  public List<? extends BoundedSource<KV<String, List<RangeHash>>>> split(
+      long desiredBundleSizeBytes, PipelineOptions options) throws IOException {
+
+    List<HadoopHashTableSource> splitHashTableSources =
+        (List<HadoopHashTableSource>) hashTableSource.split(desiredBundleSizeBytes, options);
+
+    List<BufferedHadoopHashTableSource> splitSources =
+        new ArrayList<>(splitHashTableSources.size());
+    // Keep the splits same as HashTableSource.
+    for (HadoopHashTableSource splitHashTableSource : splitHashTableSources) {
+      // Add the last range for [lastPartition, stopRow).
+      splitSources.add(new BufferedHadoopHashTableSource(splitHashTableSource));
+    }
+    return splitSources;
+  }
+
+  @Override
+  public Coder<KV<String, List<RangeHash>>> getOutputCoder() {
+    return coder;
+  }
+
+  @Override
+  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    // HashTable data files don't expose a method to estimate size or lineCount.
+    return 0;
+  }
+
+  @Override
+  public BoundedReader createReader(PipelineOptions options) throws IOException {
+    return new BufferedHashBasedReader(this, hashTableSource.createReader(options));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof BufferedHadoopHashTableSource)) {
+      return false;
+    }
+    BufferedHadoopHashTableSource that = (BufferedHadoopHashTableSource) o;
+    return maxBufferSize == that.maxBufferSize && Objects.equal(hashTableSource, that.hashTableSource);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(maxBufferSize, hashTableSource);
+  }
+
+  @Override
+  public String toString() {
+    return "BufferedHadoopHashTableSource ["
+        + immutableBytesToString(hashTableSource.startRowInclusive)
+        + ", "
+        + immutableBytesToString(hashTableSource.stopRowExclusive)
+        + "), maxBufferSize="
+        + maxBufferSize;
+  }
+
+  private void writeObject(ObjectOutputStream s) throws IOException {
+    s.writeObject(hashTableSource);
+    s.writeInt(maxBufferSize);
+  }
+
+  private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
+    this.hashTableSource = (HadoopHashTableSource) s.readObject();
+    this.coder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(RangeHashCoder.of()));
+    this.maxBufferSize = s.readInt();
+  }
+
+  private static class BufferedHashBasedReader extends BoundedReader<KV<String, List<RangeHash>>> {
+
+    private BoundedReader<RangeHash> hashReader;
+    private BufferedHadoopHashTableSource source;
+
+    private List<RangeHash> buffer;
+
+    public BufferedHashBasedReader(
+        BufferedHadoopHashTableSource source, BoundedReader<RangeHash> hashReader) {
+      this.source = source;
+      this.hashReader = hashReader;
+      this.buffer = new ArrayList<>(source.maxBufferSize);
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      if (!hashReader.start()) {
+        // HashReader does not have any hashes, return empty reader.
+        return false;
+      }
+      // Start returned true, consume the current RangeHash.
+      buffer.add(hashReader.getCurrent());
+      bufferRangeHashes();
+      // Buffer is not empty, return true to consume the current buffer.
+      return true;
+    }
+
+    // Reads from hashReader and buffers the RangeHashes.
+    // Returns true if any RangeHashes were read from hashReader.
+    private boolean bufferRangeHashes() throws IOException {
+      boolean readRangeHashes = false;
+      while (buffer.size() < source.maxBufferSize && hashReader.advance()) {
+        readRangeHashes = true;
+        buffer.add(hashReader.getCurrent());
+      }
+      return readRangeHashes;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      return bufferRangeHashes();
+    }
+
+    @Override
+    public KV<String, List<RangeHash>> getCurrent() {
+      // getCurrent only gets called when buffer is not empty.
+      Preconditions.checkArgument(!buffer.isEmpty(), "Can not get current on empty buffer.");
+      List<RangeHash> hashes = buffer;
+      // Reset the buffer for next batch.
+      buffer = new ArrayList<>(source.maxBufferSize);
+      // GroupBy key is a string and not ImmutableBytesWritable because the WritableCoder is not
+      // deterministic. The outputted PCollection is grouped by the K and needs a deterministic
+      // coder. Having a String K leads to an unfortunate double encoding, ImmutableBytesWritable->
+      // HEX string -> UTF8 encoded string. The number of batches are significantly smaller than
+      // data fetched from Bigtable and should not have meaningful impact on the job performance.
+      return KV.of(Bytes.toStringBinary(hashes.get(0).startInclusive.copyBytes()), hashes);
+    }
+
+    @Override
+    public void close() throws IOException {
+      hashReader.close();
+    }
+
+    @Override
+    public BoundedSource<KV<String, List<RangeHash>>> getCurrentSource() {
+      return source;
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
new file mode 100644
index 0000000000..3801465f2f
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.api.core.InternalApi;
+import com.google.cloud.bigtable.beam.AbstractCloudBigtableTableDoFn;
+import com.google.cloud.bigtable.beam.CloudBigtableConfiguration;
+import com.google.cloud.bigtable.beam.TemplateUtils;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
+import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor.BigtableResultHasher;
+
+/**
+ * A {@link DoFn} that takes a row range and hash from HBase and validates the hash from rows read
+ * from Cloud Bigtable.
+ */
+@InternalApi
+class ComputeAndValidateHashFromBigtableDoFn
+    extends AbstractCloudBigtableTableDoFn<KV<String, Iterable<List<RangeHash>>>, RangeHash> {
+
+  private static final long serialVersionUID = 2349094L;
+  private final ValueProvider<String> tableName;
+  private final ValueProvider<String> projectId;
+  private final ValueProvider<String> sourceHashDir;
+
+  private final TableHashWrapperFactory tableHashWrapperFactory;
+
+  // Counter for reporting matching and mismatching ranges. Names are similar to HBase sync-table
+  // job.
+  private final Counter matches = Metrics.counter("cbt-dataflow-validate", "ranges_matched");
+  private final Counter mismatches = Metrics.counter("cbt-dataflow-validate", "ranges_not_matched");
+
+  public ComputeAndValidateHashFromBigtableDoFn(SyncTableOptions options) {
+    super(TemplateUtils.BuildSyncTableConfig(options));
+    this.tableName = options.getBigtableTableId();
+    // Create a local copy of ValueProviders, PipelineOptions are not serializable.
+    projectId = options.getBigtableProject();
+    sourceHashDir = options.getHashTableOutputDir();
+    tableHashWrapperFactory = new TableHashWrapperFactory();
+  }
+
+  @VisibleForTesting
+  ComputeAndValidateHashFromBigtableDoFn(
+      CloudBigtableConfiguration config,
+      ValueProvider<String> tableName,
+      ValueProvider<String> projectId,
+      ValueProvider<String> sourceHashDir,
+      TableHashWrapperFactory factory) {
+    super(config);
+    this.tableName = tableName;
+    this.tableHashWrapperFactory = factory;
+    this.sourceHashDir = projectId;
+    this.projectId = sourceHashDir;
+  }
+
+  @ProcessElement
+  public void processElement(ProcessContext context) throws Exception {
+    // BufferedHadoopHashTableSource generates only 1 item per groupby key, but iterate just in
+    // case.
+    for (List<RangeHash> rangeHashes : context.element().getValue()) {
+      if (rangeHashes.isEmpty()) {
+        // No rows ranges found, return;
+        return;
+      }
+
+      ImmutableBytesWritable rangeStartInclusive = rangeHashes.get(0).startInclusive;
+      ImmutableBytesWritable rangeEndExclusive =
+          rangeHashes.get(rangeHashes.size() - 1).stopExclusive;
+
+      BigtableResultHasher resultHasher = new BigtableResultHasher();
+      resultHasher.startBatch(rangeStartInclusive);
+
+      // Since all the row-ranges are sorted in HashTable's data files, 1 big scan can be used
+      // to read all the row ranges. Parallelism is achieved by splitting the HashTable's data
+      // files into smaller bundle of row-ranges in GroupBy.
+      ResultScanner scanner =
+          createBigtableScan(rangeStartInclusive.copyBytes(), rangeEndExclusive.copyBytes());
+
+      Iterator<RangeHash> rangeHashIterator = rangeHashes.iterator();
+      long numRows = 0;
+
+      RangeHash currentRangeHash = rangeHashIterator.next();
+
+      // Process each row and validate hashes
+      for (Result result : scanner) {
+        numRows++;
+        if (numRows % 10_000 == 0) {
+          // Heartbeat in logs in case a large scan gets hung.
+          DOFN_LOG.debug("Processed " + numRows + " rows ");
+        }
+
+        ImmutableBytesWritable rowKey = new ImmutableBytesWritable(result.getRow());
+
+        // Check if the rowKey belongs to current range, if not keep iterating through the
+        // rangeHashes until rowKey's range is found.
+        while (!isWithinUpperBound(currentRangeHash.stopExclusive, rowKey)) {
+          validateBatchHash(context, resultHasher, currentRangeHash);
+          if (!rangeHashIterator.hasNext()) {
+            // THIS SHOULD NEVER HAPPEN. Bigtable is being scanned till the last
+            // RangeHash.endKeyExclusive(), so bigtable's result should not outlast the
+            // rangeHashes.
+            throw new IllegalStateException(
+                "Buffer reached to end while scan is still active at row :"
+                    + immutableBytesToString(result.getRow())
+                    + ". Affected Range: ["
+                    + immutableBytesToString(rangeStartInclusive)
+                    + ", "
+                    + immutableBytesToString(rangeEndExclusive)
+                    + ").");
+          }
+          currentRangeHash = rangeHashIterator.next();
+        }
+
+        // Always Hash the current row.
+        resultHasher.hashResult(result);
+      }
+
+      // Bigtable scan is finished at this point and rangeHashes may contain additional row ranges.
+      // Last range will always be unverified as the range end is exclusive and
+      // currentRow > rangeEndExclusive will never by true. Verify the last range.
+      validateBatchHash(context, resultHasher, currentRangeHash);
+
+      // If there are remaining ranges in the rangeHashes they all need to reported as mismatched as
+      // there is nothing in Cloud Bigtable for those row ranges.
+      // for (int i = bufferIndex; i < rangeHashes.size(); i++) {
+      while (rangeHashIterator.hasNext()) {
+        currentRangeHash = rangeHashIterator.next();
+        reportMismatch(context, currentRangeHash);
+      }
+
+      DOFN_LOG.debug(
+          "Finishing context by outputting "
+              + rangeHashes.size()
+              + " keys in range ["
+              + ((!rangeHashes.isEmpty())
+                  ? immutableBytesToString(rangeStartInclusive)
+                      + ", "
+                      + immutableBytesToString(rangeEndExclusive)
+                      + ")."
+                  : ", )."));
+    }
+  }
+
+  private ResultScanner createBigtableScan(byte[] startKeyInclusive, byte[] stopKeyExclusive)
+      throws IOException {
+    Table table = getConnection().getTable(TableName.valueOf(tableName.get()));
+    // Get the scan from TableHash, HashTable can be run to hash a small part of data (selected
+    // column families, timestamp range, maxVersions etc), this scan allows us to fetch the same
+    // data from Cloud Bigtable to match.
+    TableHashWrapper tableHash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+    Scan scan = tableHash.getScan();
+    // Set the workitem boundaries on the scan.
+    if (startKeyInclusive.length > 0) {
+      scan.withStartRow(startKeyInclusive, true);
+    }
+    if (stopKeyExclusive.length > 0) {
+      scan.withStopRow(stopKeyExclusive, false);
+    }
+
+    return table.getScanner(scan);
+  }
+
+  /**
+   * Determines if row >= stopExclusive for a row range (start, stopExclusive). Empty stopExclusive
+   * represents a range with no upper bound.
+   *
+   * @param stopExclusive
+   * @param row
+   * @return
+   */
+  private boolean isWithinUpperBound(
+      ImmutableBytesWritable stopExclusive, ImmutableBytesWritable row) {
+    return stopExclusive.equals(HConstants.EMPTY_END_ROW) || row.compareTo(stopExclusive) < 0;
+  }
+
+  private void validateBatchHash(
+      ProcessContext context, BigtableResultHasher resultHasher, RangeHash currentRangeHash) {
+    // The batch is always started, so its safe to finish the batch. If there were no rows, we will
+    // get a hash for empty batch.
+    resultHasher.finishBatch();
+    if (!resultHasher.getBatchHash().equals(currentRangeHash.hash)) {
+      reportMismatch(context, currentRangeHash);
+    } else {
+      matches.inc();
+    }
+    // Start a new batch
+    resultHasher.startBatch(new ImmutableBytesWritable(currentRangeHash.stopExclusive));
+  }
+
+  private void reportMismatch(ProcessContext context, RangeHash currentRangeHash) {
+    mismatches.inc();
+    DOFN_LOG.info(
+        "MISMATCH ON RANGE ["
+            + immutableBytesToString(currentRangeHash.startInclusive)
+            + ", "
+            + immutableBytesToString(currentRangeHash.stopExclusive)
+            + ").");
+    context.output(currentRangeHash);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
new file mode 100644
index 0000000000..20b693963a
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
@@ -0,0 +1,464 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.createConfiguration;
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import autovalue.shaded.com.google$.common.annotations.$VisibleForTesting;
+import com.google.api.core.InternalApi;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.beam.validation.TableHashWrapper.TableHashReader;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.DefaultCoder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * A beam source to read output of Hadoop HashTable job. The source creates 1 workitem per HashTable
+ * data file and emits a row-range/hash pair.
+ */
+@InternalApi
+class HadoopHashTableSource extends BoundedSource<RangeHash> implements Serializable {
+
+  private static final long serialVersionUID = 2383724L;
+
+  /**
+   * A simple POJO encapsulating a row range and the corresponding hash generated by HashTable job.
+   */
+  @DefaultCoder(RangeHashCoder.class)
+  public static class RangeHash {
+
+    public final ImmutableBytesWritable startInclusive;
+    public final ImmutableBytesWritable stopExclusive;
+    public final ImmutableBytesWritable hash;
+
+    private RangeHash(
+        ImmutableBytesWritable startInclusive,
+        ImmutableBytesWritable stopExclusive,
+        ImmutableBytesWritable hash) {
+      this.startInclusive = startInclusive;
+      this.stopExclusive = stopExclusive;
+      this.hash = hash;
+    }
+
+    static RangeHash of(
+        ImmutableBytesWritable startInclusive,
+        ImmutableBytesWritable stopExclusive,
+        ImmutableBytesWritable hash) {
+      Preconditions.checkNotNull(startInclusive);
+      Preconditions.checkNotNull(stopExclusive);
+      Preconditions.checkNotNull(hash);
+      return new RangeHash(startInclusive, stopExclusive, hash);
+    }
+
+    @Override
+    public String toString() {
+      return String.format(
+          "RangeHash{ range = [ %s, %s), hash: %s }",
+          immutableBytesToString(startInclusive),
+          immutableBytesToString(stopExclusive),
+          immutableBytesToString(hash));
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof RangeHash)) {
+        return false;
+      }
+      RangeHash rangeHash = (RangeHash) o;
+      return Objects.equal(startInclusive, rangeHash.startInclusive)
+          && Objects.equal(stopExclusive, rangeHash.stopExclusive)
+          && Objects.equal(hash, rangeHash.hash);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(startInclusive, stopExclusive, hash);
+    }
+  }
+
+  public static final Log LOG = LogFactory.getLog(HadoopHashTableSource.class);
+
+  private ValueProvider<String> projectId;
+
+  // Path to the output of HashTable job. Usually in GCS.
+  private ValueProvider<String> sourceHashDir;
+
+  // Coder to encode/decode the RangeHash
+  private RangeHashCoder coder;
+
+  // Row range owned by this source.
+  @VisibleForTesting ImmutableBytesWritable startRowInclusive;
+
+  @VisibleForTesting ImmutableBytesWritable stopRowExclusive;
+
+  private TableHashWrapperFactory tableHashWrapperFactory;
+
+  public HadoopHashTableSource() {
+    this.coder = new RangeHashCoder();
+  }
+
+  /**
+   * Creates a HadoopHashTableSource that reads HashTable data from hashTableOutputDir in GCS bucket
+   * in project $(projectId).
+   */
+  public HadoopHashTableSource(
+      ValueProvider<String> projectId, ValueProvider<String> sourceHashDir) {
+    this(projectId, sourceHashDir, /*startRowInclusive*/ null, /*stopRowExclusive*/ null);
+  }
+
+  /**
+   * Constructor to initialize a HadoopHashTableSource for a given row-range. Used for creating
+   * split sources.
+   */
+  @$VisibleForTesting
+  HadoopHashTableSource(
+      ValueProvider<String> projectId,
+      ValueProvider<String> sourceHashDir,
+      ImmutableBytesWritable startRowInclusive,
+      ImmutableBytesWritable stopRowExclusive) {
+    this(
+        projectId,
+        sourceHashDir,
+        startRowInclusive,
+        stopRowExclusive,
+        new TableHashWrapperFactory());
+  }
+
+  @VisibleForTesting
+  HadoopHashTableSource(
+      ValueProvider<String> projectId,
+      ValueProvider<String> hadoopHashTableOutputDir,
+      ImmutableBytesWritable startRowInclusive,
+      ImmutableBytesWritable stopRowExclusive,
+      TableHashWrapperFactory tableHashWrapperFactory) {
+    this.coder = new RangeHashCoder();
+    this.projectId = projectId;
+    this.sourceHashDir = hadoopHashTableOutputDir;
+    // startRow and stopRow will be null when the template is initialized. startRow and stopRow are
+    // read from the hashTableOutputDir, which is only available at pipeline runtime.
+    this.startRowInclusive = startRowInclusive;
+    this.stopRowExclusive = stopRowExclusive;
+    this.tableHashWrapperFactory = tableHashWrapperFactory;
+  }
+
+  @Override
+  public List<? extends BoundedSource<RangeHash>> split(
+      long desiredBundleSizeBytes, PipelineOptions options) throws IOException {
+    // This method relies on the partitioning done by HBase-HashTable job. There is a possibility
+    // of stragglers. SyncTable handles it by using a group by and further splitting workitems.
+    TableHashWrapper hash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+
+    ImmutableList<ImmutableBytesWritable> partitions = hash.getPartitions();
+    int numPartitions = partitions.size();
+
+    List<HadoopHashTableSource> splitSources = new ArrayList<>(numPartitions + 1);
+    if (numPartitions == 0) {
+      // There are 0 partitions and 1 hashfile, return single source with full key range.
+      splitSources.add(
+          new HadoopHashTableSource(
+              projectId,
+              sourceHashDir,
+              new ImmutableBytesWritable(hash.getStartRow()),
+              new ImmutableBytesWritable(hash.getStopRow()),
+              tableHashWrapperFactory));
+      return splitSources;
+    }
+
+    // Use the HashTable start key. The value is HConstants.EMPTY_START_ROW for full table scan.
+    ImmutableBytesWritable startRow = new ImmutableBytesWritable(hash.getStartRow());
+    ImmutableBytesWritable stopRow = new ImmutableBytesWritable(hash.getStopRow());
+
+    // The output of HashTable is organized as partition file and a set of datafiles.
+    // Partition file contains a list of partitions, these partitions split the key-range of a table
+    // into roughly equal row-ranges and hashes for these row-ranges are stored in a single
+    // datafile.
+    //
+    // There are always numPartitions +1 data files. Datafile(i) covers hashes for [partition{i-1},
+    // partition{i}).
+    // So a partition file containing entries [b,f] for a table with row range [a,z] will have 3
+    // data files containing hashes.
+    // file0 will contain [a(startRow), b), file1 will contain [b,f), and file3 will contain
+    // [f,z(stopRow))
+    for (int i = 0; i < numPartitions; i++) {
+      LOG.debug(
+          "Adding: ["
+              + immutableBytesToString(startRow.get())
+              + ", "
+              + immutableBytesToString(partitions.get(i).get())
+              + "]");
+      splitSources.add(
+          new HadoopHashTableSource(
+              projectId, sourceHashDir, startRow, partitions.get(i), tableHashWrapperFactory));
+      startRow = partitions.get(i);
+    }
+    // Add the last range for [lastPartition, stopRow).
+    LOG.debug(
+        "Adding: ["
+            + immutableBytesToString(startRow.get())
+            + ", "
+            + immutableBytesToString(stopRow.get())
+            + "]");
+    // Add the last range for [lastPartition, stopRow).
+    splitSources.add(
+        new HadoopHashTableSource(
+            projectId,
+            sourceHashDir,
+            partitions.get(numPartitions - 1),
+            new ImmutableBytesWritable(stopRow),
+            tableHashWrapperFactory));
+    LOG.info("Returning " + splitSources.size() + " sources from " + numPartitions + " partitions");
+    return splitSources;
+  }
+
+  @Override
+  public Coder<RangeHash> getOutputCoder() {
+    return coder;
+  }
+
+  @Override
+  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    // HashTable data files don't expose a method to estimate size or lineCount.
+    return 0;
+  }
+
+  @Override
+  public BoundedReader createReader(PipelineOptions options) throws IOException {
+    TableHashWrapper hash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+
+    // The row range for an un-split source is determined from the output of HashTable job.
+    // HashTableOutputDir is a runtime parameter and hence not available at construction time, so
+    // populate the start and stop here.
+    if (startRowInclusive == null || stopRowExclusive == null) {
+      startRowInclusive = hash.getStartRow();
+      stopRowExclusive = hash.getStopRow();
+    }
+
+    return new HashBasedReader(
+        this,
+        new ImmutableBytesWritable(startRowInclusive),
+        new ImmutableBytesWritable(stopRowExclusive),
+        hash.newReader(
+            createConfiguration(this.projectId.get(), this.sourceHashDir.get()),
+            new ImmutableBytesWritable(startRowInclusive)));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof HadoopHashTableSource)) {
+      return false;
+    }
+    HadoopHashTableSource that = (HadoopHashTableSource) o;
+    return Objects.equal(projectId, that.projectId)
+        && Objects.equal(sourceHashDir, that.sourceHashDir)
+        && Objects.equal(startRowInclusive, that.startRowInclusive)
+        && Objects.equal(stopRowExclusive, that.stopRowExclusive);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(projectId, sourceHashDir, coder, startRowInclusive, stopRowExclusive);
+  }
+
+  @Override
+  public String toString() {
+    return "HadoopHashTableSource ["
+        + immutableBytesToString(startRowInclusive)
+        + ", "
+        + immutableBytesToString(stopRowExclusive)
+        + ')';
+  }
+
+  private void writeObject(ObjectOutputStream s) throws IOException {
+    // s.defaultWriteObject();
+    s.writeObject(projectId);
+    s.writeObject(sourceHashDir);
+    s.writeObject(tableHashWrapperFactory);
+    // Start and Stop can be null, write a boolean to indicate if start/stop is expected.
+    if (startRowInclusive == null) {
+      s.writeBoolean(false);
+    } else {
+      s.writeBoolean(true);
+      s.writeObject(startRowInclusive.copyBytes());
+    }
+
+    if (stopRowExclusive == null) {
+      s.writeBoolean(false);
+    } else {
+      s.writeBoolean(true);
+      s.writeObject(stopRowExclusive.copyBytes());
+    }
+  }
+
+  private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
+    // s.defaultReadObject();
+    this.projectId = (ValueProvider<String>) s.readObject();
+    this.sourceHashDir = (ValueProvider<String>) s.readObject();
+    this.tableHashWrapperFactory = (TableHashWrapperFactory) s.readObject();
+    // start/stop can be null, they are preceded by a boolean indicating their presence.
+    if (s.readBoolean() == true) {
+      this.startRowInclusive = new ImmutableBytesWritable((byte[]) s.readObject());
+    }
+    if (s.readBoolean() == true) {
+      this.stopRowExclusive = new ImmutableBytesWritable((byte[]) s.readObject());
+    }
+  }
+
+  @VisibleForTesting
+  static class HashBasedReader extends BoundedReader<RangeHash> {
+
+    final HadoopHashTableSource source;
+    final TableHashReader reader;
+
+    final ImmutableBytesWritable startRowInclusive;
+    final ImmutableBytesWritable stopRowExclusive;
+
+    long numKeys = 0;
+    // Flag indicating that this workitem is finished.
+    boolean isDone = false;
+    ImmutableBytesWritable currentRangeStartKey;
+    // Hash for the current range.
+    ImmutableBytesWritable currentHash;
+    RangeHash currentRangeHash;
+
+    public HashBasedReader(
+        HadoopHashTableSource source,
+        ImmutableBytesWritable startRowInclusive,
+        ImmutableBytesWritable stopRowExclusive,
+        TableHashReader reader) {
+      this.reader = reader;
+      this.source = source;
+      this.startRowInclusive = startRowInclusive;
+      this.stopRowExclusive = stopRowExclusive;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      // NO CHECKED EXCEPTIONS HERE.
+      LOG.debug(
+          "Starting a new reader at key range ["
+              + immutableBytesToString(startRowInclusive)
+              + " ,"
+              + immutableBytesToString(stopRowExclusive)
+              + ").");
+      numKeys = 0;
+
+      if (readNextKey()) {
+        // Dataflow calls start, followed by getCurrent. HashBased reader needs to read on TableHash
+        // twice to return a RangeHash since it specifies both range-start and range-end.
+        advance();
+        return true;
+      }
+
+      isDone = true;
+      return false;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      if (isDone) {
+        LOG.debug("Ending workitem at key " + immutableBytesToString(currentRangeStartKey) + " .");
+        return false;
+      }
+
+      ImmutableBytesWritable startKey = this.currentRangeStartKey;
+      ImmutableBytesWritable hash = this.currentHash;
+
+      if (!readNextKey()) {
+        this.currentRangeHash = RangeHash.of(startKey, stopRowExclusive, hash);
+        // return true since we have lastBatchStartKey to emit. Set isDone=true to prevent reading
+        // from a potentially exhausted reader.
+        isDone = true;
+      } else {
+        this.currentRangeHash = RangeHash.of(startKey, reader.getCurrentKey(), hash);
+      }
+
+      return true;
+    }
+
+    // Returns true if a key can be read for this workitem.
+    private boolean readNextKey() throws IOException {
+      if (reader.next()) {
+        numKeys++;
+        this.currentRangeStartKey = reader.getCurrentKey();
+        if ( // StopRow is not set, everything is in bounds.
+        (stopRowExclusive.equals(HConstants.EMPTY_END_ROW)
+            || currentRangeStartKey.compareTo(stopRowExclusive) < 0)) { // currentKey < stopKey
+          // There is a key to read and the key is within the bounds of this workitem. Return true.
+          this.currentHash = reader.getCurrentHash();
+          return true;
+        } else {
+          // There is a key to read but its outside of the bounds of this workitem.
+          this.currentHash = null;
+          return false;
+        }
+      }
+
+      // Nothing left to read for this workitem.
+      currentRangeStartKey = null;
+      currentHash = null;
+      return false;
+    }
+
+    @Override
+    public RangeHash getCurrent() {
+      return currentRangeHash;
+    }
+
+    @Override
+    public void close() throws IOException {
+      LOG.info(
+          "Finishing a reader for key range ["
+              + immutableBytesToString(startRowInclusive)
+              + " ,"
+              + immutableBytesToString(stopRowExclusive)
+              + ") after reading "
+              + numKeys
+              + " keys. Ending at "
+              + immutableBytesToString(currentRangeStartKey));
+      reader.close();
+    }
+
+    @Override
+    public BoundedSource<RangeHash> getCurrentSource() {
+      return source;
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
new file mode 100644
index 0000000000..6799d63872
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InvalidObjectException;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.List;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/** Coder used by beam to encode/decode @{@link RangeHash} objects. */
+public class RangeHashCoder extends Coder<RangeHash> {
+
+  public static Coder<RangeHash> of() {
+    return new RangeHashCoder();
+  }
+
+  @Override
+  public void encode(RangeHash value, OutputStream outStream) throws IOException {
+    if (value == null) {
+      throw new CoderException("Can not encode null objects.");
+    }
+    DataOutputStream dataOutputStream = new DataOutputStream(outStream);
+    // RangeHash fields can never be null.
+    value.startInclusive.write(dataOutputStream);
+    value.stopExclusive.write(dataOutputStream);
+    value.hash.write(dataOutputStream);
+  }
+
+  @Override
+  public RangeHash decode(InputStream inStream) throws IOException {
+    DataInputStream dataInputStream = new DataInputStream(inStream);
+
+    ImmutableBytesWritable startInclusive = new ImmutableBytesWritable();
+    startInclusive.readFields(dataInputStream);
+
+    ImmutableBytesWritable stopExclusive = new ImmutableBytesWritable();
+    stopExclusive.readFields(dataInputStream);
+
+    ImmutableBytesWritable hash = new ImmutableBytesWritable();
+    hash.readFields(dataInputStream);
+
+    return RangeHash.of(startInclusive, stopExclusive, hash);
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    // This is a deterministic coder as it writes the byte[] in order.
+  }
+
+  /**
+   * !!! DO NOT DELETE !!!
+   *
+   * <p>See readObjectNoData method in:
+   * https://docs.oracle.com/javase/7/docs/platform/serialization/spec/input.html#6053.
+   *
+   * <p>Disable backwards compatibility with previous versions that were serialized.
+   *
+   * @throws InvalidObjectException
+   */
+  @SuppressWarnings("unused")
+  private void readObjectNoData() throws InvalidObjectException {
+    throw new InvalidObjectException("Hash data required");
+  }
+
+  @Override
+  protected Object clone() throws CloneNotSupportedException {
+    return super.clone();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return other instanceof RangeHashCoder;
+  }
+
+  @Override
+  public int hashCode() {
+    return RangeHashCoder.class.hashCode();
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
new file mode 100644
index 0000000000..a664ea2602
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly;
+import com.google.cloud.bigtable.beam.sequencefiles.Utils;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.gson.Gson;
+import java.util.List;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
+import org.apache.beam.sdk.metrics.MetricResult;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * A job that takes HBase HashTable output and compares the hashes from Cloud Bigtable table.
+ *
+ * <p>Execute the following command to run the job directly:
+ *
+ * <pre>
+ *   mvn compile exec:java \
+ *      -DmainClass=com.google.cloud.bigtable.beam.validation.SyncTableJob \
+ *      -Dexec.args="--runner=DataflowRunner \
+ *            --project=$PROJECT \
+ *            --bigtableInstanceId=$INSTANCE \
+ *            --bigtableTableId=$TABLE \
+ *            --sourceHashDir=$SOURCE_HASH_DIR \
+ *            --outputPrefix=$OUtPUT_PREFIX \
+ *            --stagingLocation=$STAGING_LOC \
+ *            --tempLocation=$TMP_LOC \
+ *            --region=$REGION \
+ *            --workerZone=$WORKER_ZONE"
+ * </pre>
+ *
+ * <p>Execute the following command to create the Dataflow template:
+ *
+ * <pre>
+ * mvn compile exec:java \
+ *   -DmainClass=com.google.cloud.bigtable.beam.validation.SyncTableJob \
+ *   -Dexec.args="--runner=DataflowRunner \
+ *                --project=$PROJECT \
+ *                --stagingLocation=gs://$STAGING_PATH \
+ *                --templateLocation=gs://$TEMPLATE_PATH \
+ *                --wait=false"
+ * </pre>
+ *
+ * <p>There are a few ways to run the pipeline using the template. See Dataflow doc for details:
+ * https://cloud.google.com/dataflow/docs/templates/executing-templates. Optionally, you can upload
+ * a metadata file that contains information about the runtime parameters that can be used for
+ * parameter validation purpose and more. A sample metadata file can be found at
+ * "src/main/resources/SyncTableJob_metadata".
+ *
+ * <p>An example using gcloud command line:
+ *
+ * <pre>
+ * gcloud beta dataflow jobs run $JOB_NAME \
+ *   --gcs-location gs://$TEMPLATE_PATH \
+ *   --parameters bigtableProject=$PROJECT,bigtableInstanceId=$INSTANCE,bigtableTableId=$TABLE,sourceHashDir=gs://$SOURCE_HASH_DIR,outputPrefix=$OUTPUT_PREFIX
+ * </pre>
+ */
+@InternalExtensionOnly
+public class SyncTableJob {
+
+  private static final Log LOG = LogFactory.getLog(SyncTableJob.class);
+
+  public interface SyncTableOptions extends GcpOptions {
+
+    @Description("This Bigtable App Profile id.")
+    ValueProvider<String> getBigtableAppProfileId();
+
+    @SuppressWarnings("unused")
+    void setBigtableAppProfileId(ValueProvider<String> appProfileId);
+
+    @Description("The project that contains the table to export. Defaults to --project.")
+    @Default.InstanceFactory(Utils.DefaultBigtableProjectFactory.class)
+    ValueProvider<String> getBigtableProject();
+
+    @SuppressWarnings("unused")
+    void setBigtableProject(ValueProvider<String> projectId);
+
+    @Description("The Bigtable instance id that contains the table to export.")
+    ValueProvider<String> getBigtableInstanceId();
+
+    @SuppressWarnings("unused")
+    void setBigtableInstanceId(ValueProvider<String> instanceId);
+
+    @Description("The Bigtable table id to export.")
+    ValueProvider<String> getBigtableTableId();
+
+    @SuppressWarnings("unused")
+    void setBigtableTableId(ValueProvider<String> tableId);
+
+    @Description("HBase HashTable job output dir.")
+    ValueProvider<String> getHashTableOutputDir();
+
+    @SuppressWarnings("unused")
+    // Rename it to sourceHashDir as in HBase sync table job.
+    void setHashTableOutputDir(ValueProvider<String> hashTableOutputDir);
+
+    @Description("File pattern for files containing mismatched row ranges.")
+    ValueProvider<String> getOutputPrefix();
+
+    @SuppressWarnings("unused")
+    void setOutputPrefix(ValueProvider<String> outputPrefix);
+
+    // When creating a template, this flag must be set to false.
+    @Description("Wait for pipeline to finish.")
+    @Default.Boolean(true)
+    boolean getWait();
+
+    @SuppressWarnings("unused")
+    void setWait(boolean wait);
+  }
+
+  public static void main(String[] args) {
+    PipelineOptionsFactory.register(SyncTableOptions.class);
+
+    SyncTableOptions opts =
+        PipelineOptionsFactory.fromArgs(args).withValidation().as(SyncTableOptions.class);
+
+    LOG.info("===> Building Pipeline");
+    Pipeline pipeline = buildPipeline(opts);
+
+    LOG.info("===> Running Pipeline");
+    PipelineResult result = pipeline.run();
+
+    if (opts.getWait()) {
+      Utils.waitForPipelineToFinish(result);
+    }
+
+    // Log all the counters for number of matches and number of mismatches.
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    for (MetricResult<Long> counter : metrics.getCounters()) {
+      LOG.warn(counter.getName() + ":" + counter.getAttempted());
+    }
+  }
+
+  @VisibleForTesting
+  public static Pipeline buildPipeline(SyncTableOptions opts) {
+    Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts));
+    pipeline
+        .apply(
+            "Read HBase HashTable output",
+            Read.from(
+                new BufferedHadoopHashTableSource(
+                    new HadoopHashTableSource(
+                        opts.getBigtableProject(), opts.getHashTableOutputDir()))))
+        .apply(
+            "group by and create granular workitems", GroupByKey.<String, List<RangeHash>>create())
+        .apply("validate hash", ParDo.of(new ComputeAndValidateHashFromBigtableDoFn(opts)))
+        .apply("Serialize the ranges", MapElements.via(new RangeHashToString()))
+        .apply("Write to file", TextIO.write().to(opts.getOutputPrefix()).withSuffix(".txt"));
+    return pipeline;
+  }
+
+  static class RangeHashToString extends SimpleFunction<RangeHash, String> {
+    // TODO maybe explore a sequenceFile sink for RangeHash. Hadoop jobs using this output may be
+    // easier to write for sequence file.
+
+    // GSON is not serializable, keep it transient. Member variable to avoid creating a Gson object
+    // per apply call.
+    private transient Gson gson = null;
+
+    @Override
+    public String apply(RangeHash input) {
+      if (gson == null) {
+        gson = new Gson();
+      }
+      return gson.toJson(input);
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
new file mode 100644
index 0000000000..2f0c5cc4cc
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/** Utility class for SyncTable job. */
+public class SyncTableUtils {
+
+  private SyncTableUtils() {}
+
+  public static String immutableBytesToString(ImmutableBytesWritable bytes) {
+    if (bytes == null) {
+      return "";
+    }
+    return immutableBytesToString(bytes.get());
+  }
+
+  public static String immutableBytesToString(byte[] bytes) {
+    return Bytes.toStringBinary(bytes);
+  }
+
+  /**
+   * Creates a HBase configuration for reading HashTable output from GCS bucket located in
+   * projectId.
+   *
+   * @param projectId project containing the GCS bucket holding hashtable output.
+   * @param sourceHashDir location of hashtable output from HBase.
+   * @return
+   */
+  public static Configuration createConfiguration(String projectId, String sourceHashDir) {
+    Configuration conf = HBaseConfiguration.create();
+    conf.set("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS");
+    conf.set("fs.gs.project.id", projectId);
+    conf.set("fs.defaultFS", sourceHashDir);
+    conf.set("google.cloud.auth.service.account.enable", "true");
+    return conf;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
new file mode 100644
index 0000000000..2f75c5722a
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.api.core.InternalApi;
+import com.google.common.collect.ImmutableList;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.Serializable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * Wraps HashTable.TableHash object and delegates the calls to it. This class exposes the minimal
+ * interface required from TableHash. This class is required for mocking purposes in unit tests.
+ */
+@InternalApi
+public interface TableHashWrapper extends Serializable {
+
+  int getNumHashFiles();
+
+  ImmutableList<ImmutableBytesWritable> getPartitions();
+
+  ImmutableBytesWritable getStartRow();
+
+  ImmutableBytesWritable getStopRow();
+
+  Scan getScan();
+
+  TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow);
+
+  interface TableHashReader extends Closeable {
+    boolean next() throws IOException;
+
+    ImmutableBytesWritable getCurrentKey();
+
+    ImmutableBytesWritable getCurrentHash();
+
+    void close() throws IOException;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
new file mode 100644
index 0000000000..262aadc7c5
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.createConfiguration;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+/** Factory to create a TableHashWrapper. */
+public class TableHashWrapperFactory implements Serializable {
+
+  private static final long serialVersionUID = 265433454L;
+
+  public TableHashWrapper getTableHash(String projectId, String sourceHashDir) throws IOException {
+    return TableHashWrapperImpl.create(
+        createConfiguration(projectId, sourceHashDir), sourceHashDir);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
new file mode 100644
index 0000000000..71a0f6ddaa
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash.Reader;
+
+class TableHashWrapperImpl implements TableHashWrapper {
+
+  static TableHashWrapper create(Configuration conf, String hashTableOutputDir) throws IOException {
+    TableHash tableHash = TableHash.read(conf, new Path(hashTableOutputDir));
+
+    TableHashWrapper tableHashWrapper = new TableHashWrapperImpl(tableHash);
+    Preconditions.checkArgument(
+        tableHashWrapper.getNumHashFiles() == (tableHashWrapper.getPartitions().size() + 1),
+        String.format(
+            "Corrupt hashtable output. %d hash files for %d partitions. Expected %d files.",
+            tableHashWrapper.getNumHashFiles(),
+            tableHashWrapper.getPartitions().size(),
+            tableHashWrapper.getPartitions().size() + 1));
+    return tableHashWrapper;
+  }
+
+  private final TableHash hash;
+
+  private TableHashWrapperImpl(TableHash hash) {
+    this.hash = hash;
+  }
+
+  public int getNumHashFiles() {
+    return BigtableTableHashAccessor.getNumHashFiles(hash);
+  }
+
+  public ImmutableList<ImmutableBytesWritable> getPartitions() {
+    return BigtableTableHashAccessor.getPartitions(hash);
+  }
+
+  public ImmutableBytesWritable getStartRow() {
+    return BigtableTableHashAccessor.getStartRow(hash);
+  }
+
+  public ImmutableBytesWritable getStopRow() {
+    return BigtableTableHashAccessor.getStopRow(hash);
+  }
+
+  public Scan getScan() {
+    try {
+      return BigtableTableHashAccessor.getScan(hash);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to init a scan from TableHash: ", e);
+    }
+  }
+
+  public TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow) {
+    try {
+      return TableHashReaderImpl.create(hash.newReader(conf, startRow));
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Failed to open reader at " + immutableBytesToString(startRow.copyBytes()), e);
+    }
+  }
+
+  static class TableHashReaderImpl implements TableHashReader {
+
+    private final Reader reader;
+
+    static TableHashReaderImpl create(TableHash.Reader reader) {
+      Preconditions.checkNotNull(reader, "Reader can not be null.");
+      return new TableHashReaderImpl(reader);
+    }
+
+    private TableHashReaderImpl(TableHash.Reader reader) {
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean next() throws IOException {
+      return reader.next();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentKey() {
+      return reader.getCurrentKey();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentHash() {
+      return reader.getCurrentHash();
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
new file mode 100644
index 0000000000..a5312d6c52
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.HashTable.ResultHasher;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash;
+
+/** A helper class to access package private fields of HashTable.TableHash. */
+public class BigtableTableHashAccessor {
+
+  // Restrict object creation. This class should only be used to access state from TableHash.
+  private BigtableTableHashAccessor() {}
+
+  public static int getNumHashFiles(TableHash hash) {
+    return hash.numHashFiles;
+  }
+
+  public static ImmutableList<ImmutableBytesWritable> getPartitions(TableHash hash) {
+    return ImmutableList.copyOf(hash.partitions);
+  }
+
+  public static ImmutableBytesWritable getStartRow(TableHash hash) {
+    return new ImmutableBytesWritable(hash.startRow);
+  }
+
+  public static ImmutableBytesWritable getStopRow(TableHash hash) {
+    return new ImmutableBytesWritable(hash.stopRow);
+  }
+
+  public static Scan getScan(TableHash hash) throws IOException {
+    return hash.initScan();
+  }
+
+  // Wrapper to access package private class ResultHasher. Delegates all the calls to underlying
+  // TableHash.ResultHasher, helps in mocking for unit tests.
+  public static class BigtableResultHasher {
+    private final ResultHasher hasher;
+
+    public BigtableResultHasher() {
+      hasher = new ResultHasher();
+    }
+
+    public void startBatch(ImmutableBytesWritable batchStartKey) {
+      hasher.startBatch(batchStartKey);
+    }
+
+    public void finishBatch() {
+      hasher.finishBatch();
+    }
+
+    public ImmutableBytesWritable getBatchHash() {
+      return hasher.getBatchHash();
+    }
+
+    public void hashResult(Result result) {
+      hasher.hashResult(result);
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc
deleted file mode 100644
index 8fe4533a0159f76b5bb3a1968ac5d1fa7fc45a58..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 12
TcmYc;N@ieSU}AWGHv9?z6(s~B

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc
deleted file mode 100644
index 1467a17f1f9924f6a69bd2963d5e21ff088ca3f6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 20
bcmYc;N@ieSU}8|8vgZ5;2Nu4voskOwJbDJ(

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
deleted file mode 100644
index 83e482aac0..0000000000
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
+++ /dev/null
@@ -1,2 +0,0 @@
-
-test-snapshottest�����. (
\ No newline at end of file
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc
deleted file mode 100644
index ea5b25e7785f94c7a36b646dc7c947d4cc4bce43..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
lcmYc;N@ieSU}A`pign*xAbgs=z3KU;RizcU@Jr?pH2^LR7rFod

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc
deleted file mode 100644
index 51cacdd03b5469b099265d607b492728ca48fb07..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
lcmYc;N@ieSU}ESn7HMqPocxn_X+h|wRizcU@Jr?pH2^J{7oz|G

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc
deleted file mode 100644
index 2c4de3ac0ea20bd17cca5a5cfe7b3f696c12e5c3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
lcmYc;N@ieSU}9*<{16|;p~b$p^6arqt4b?y;g`%IY5+o67%%_;

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc
deleted file mode 100644
index 931ebfb54555d336879fa44ef956de26ba9c2a4e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
mcmYc;N@ieSU}9MD$S>Yy!SuIZFSEYew5qfM7k<ecq6Pqmf*JY%

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc
deleted file mode 100644
index 32f450dba460c3e884c8701cd61ad790f774e224..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
lcmYc;N@ieSU}E5XEhv9;tD>MA=N`FDt4b?y;j9U&TL27c7ajlr

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc
deleted file mode 100644
index 80317a1515597ecbac0015cf7edba1283ce6824b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
mcmYc;N@ieSU}9J$_&t8|r`=f(wzKMOT2)$s3%_IzQ3C*MFc~EP

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc
deleted file mode 100644
index 00a9d7720d3d3867ea0cd0a153d5265158a9b5ff..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
mcmYc;N@ieSU}89xHtAGe`OGB@e!ed@ttzd+g<mp<r~v?92pG`-

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc
deleted file mode 100644
index 1d7e3d8653bfae2874b0d726f295edbbd3e92fa8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
lcmYc;N@ieSU}CVbo1{1?#CqoE%2>fot4b?y;g`%IY5*h+7hwPZ

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc
deleted file mode 100644
index ca57c97e2deddae20f6c82712db07fd2e35620d7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 52
lcmYc;N@ieSU}Esk=Ghmve-7`}t(p;=R+U!Z!Y`Ra)BrhK7wrH5

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
index 7f8f8fc2db..921caf2d6d 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
@@ -1,107 +1,133 @@
+// Run from HBase shell. Run `hbase shell` from unix terminal on HBase master.
 create 'test', 'cf', {SPLITS => ["1", "2", "3", "4", "5", "6", "7", "8", "9"]}
-put 'test','1', 'cf:a', 'value1'
-put 'test','2', 'cf:a', 'value2'
-put 'test','3', 'cf:a', 'value3'
-put 'test','4', 'cf:a', 'value4'
-put 'test','5', 'cf:a', 'value5'
-put 'test','6', 'cf:a', 'value6'
-put 'test','7', 'cf:a', 'value7'
-put 'test','8', 'cf:a', 'value8'
-put 'test','9', 'cf:a', 'value9'
-put 'test','10', 'cf:a', 'value10'
-put 'test','11', 'cf:a', 'value11'
-put 'test','12', 'cf:a', 'value12'
-put 'test','13', 'cf:a', 'value13'
-put 'test','14', 'cf:a', 'value14'
-put 'test','15', 'cf:a', 'value15'
-put 'test','16', 'cf:a', 'value16'
-put 'test','17', 'cf:a', 'value17'
-put 'test','18', 'cf:a', 'value18'
-put 'test','19', 'cf:a', 'value19'
-put 'test','20', 'cf:a', 'value20'
-put 'test','21', 'cf:a', 'value21'
-put 'test','22', 'cf:a', 'value22'
-put 'test','23', 'cf:a', 'value23'
-put 'test','24', 'cf:a', 'value24'
-put 'test','25', 'cf:a', 'value25'
-put 'test','26', 'cf:a', 'value26'
-put 'test','27', 'cf:a', 'value27'
-put 'test','28', 'cf:a', 'value28'
-put 'test','29', 'cf:a', 'value29'
-put 'test','30', 'cf:a', 'value30'
-put 'test','31', 'cf:a', 'value31'
-put 'test','32', 'cf:a', 'value32'
-put 'test','33', 'cf:a', 'value33'
-put 'test','34', 'cf:a', 'value34'
-put 'test','35', 'cf:a', 'value35'
-put 'test','36', 'cf:a', 'value36'
-put 'test','37', 'cf:a', 'value37'
-put 'test','38', 'cf:a', 'value38'
-put 'test','39', 'cf:a', 'value39'
-put 'test','40', 'cf:a', 'value40'
-put 'test','41', 'cf:a', 'value41'
-put 'test','42', 'cf:a', 'value42'
-put 'test','43', 'cf:a', 'value43'
-put 'test','44', 'cf:a', 'value44'
-put 'test','45', 'cf:a', 'value45'
-put 'test','46', 'cf:a', 'value46'
-put 'test','47', 'cf:a', 'value47'
-put 'test','48', 'cf:a', 'value48'
-put 'test','49', 'cf:a', 'value49'
-put 'test','50', 'cf:a', 'value50'
-put 'test','51', 'cf:a', 'value51'
-put 'test','52', 'cf:a', 'value52'
-put 'test','53', 'cf:a', 'value53'
-put 'test','54', 'cf:a', 'value54'
-put 'test','55', 'cf:a', 'value55'
-put 'test','56', 'cf:a', 'value56'
-put 'test','57', 'cf:a', 'value57'
-put 'test','58', 'cf:a', 'value58'
-put 'test','59', 'cf:a', 'value59'
-put 'test','60', 'cf:a', 'value60'
-put 'test','61', 'cf:a', 'value61'
-put 'test','62', 'cf:a', 'value62'
-put 'test','63', 'cf:a', 'value63'
-put 'test','64', 'cf:a', 'value64'
-put 'test','65', 'cf:a', 'value65'
-put 'test','66', 'cf:a', 'value66'
-put 'test','67', 'cf:a', 'value67'
-put 'test','68', 'cf:a', 'value68'
-put 'test','69', 'cf:a', 'value69'
-put 'test','70', 'cf:a', 'value70'
-put 'test','71', 'cf:a', 'value71'
-put 'test','72', 'cf:a', 'value72'
-put 'test','73', 'cf:a', 'value73'
-put 'test','74', 'cf:a', 'value74'
-put 'test','75', 'cf:a', 'value75'
-put 'test','76', 'cf:a', 'value76'
-put 'test','77', 'cf:a', 'value77'
-put 'test','78', 'cf:a', 'value78'
-put 'test','79', 'cf:a', 'value79'
-put 'test','80', 'cf:a', 'value80'
-put 'test','81', 'cf:a', 'value81'
-put 'test','82', 'cf:a', 'value82'
-put 'test','83', 'cf:a', 'value83'
-put 'test','84', 'cf:a', 'value84'
-put 'test','85', 'cf:a', 'value85'
-put 'test','86', 'cf:a', 'value86'
-put 'test','87', 'cf:a', 'value87'
-put 'test','88', 'cf:a', 'value88'
-put 'test','89', 'cf:a', 'value89'
-put 'test','90', 'cf:a', 'value90'
-put 'test','91', 'cf:a', 'value91'
-put 'test','92', 'cf:a', 'value92'
-put 'test','93', 'cf:a', 'value93'
-put 'test','94', 'cf:a', 'value94'
-put 'test','95', 'cf:a', 'value95'
-put 'test','96', 'cf:a', 'value96'
-put 'test','97', 'cf:a', 'value97'
-put 'test','98', 'cf:a', 'value98'
-put 'test','99', 'cf:a', 'value99'
-put 'test','100', 'cf:a', 'value100'
+put 'test','1', 'cf:a', 'value1', 100
+put 'test','2', 'cf:a', 'value2', 100
+put 'test','3', 'cf:a', 'value3', 100
+put 'test','4', 'cf:a', 'value4', 100
+put 'test','5', 'cf:a', 'value5', 100
+put 'test','6', 'cf:a', 'value6', 100
+put 'test','7', 'cf:a', 'value7', 100
+put 'test','8', 'cf:a', 'value8', 100
+put 'test','9', 'cf:a', 'value9', 100
+put 'test','10', 'cf:a', 'value10', 100
+put 'test','11', 'cf:a', 'value11', 100
+put 'test','12', 'cf:a', 'value12', 100
+put 'test','13', 'cf:a', 'value13', 100
+put 'test','14', 'cf:a', 'value14', 100
+put 'test','15', 'cf:a', 'value15', 100
+put 'test','16', 'cf:a', 'value16', 100
+put 'test','17', 'cf:a', 'value17', 100
+put 'test','18', 'cf:a', 'value18', 100
+put 'test','19', 'cf:a', 'value19', 100
+put 'test','20', 'cf:a', 'value20', 100
+put 'test','21', 'cf:a', 'value21', 100
+put 'test','22', 'cf:a', 'value22', 100
+put 'test','23', 'cf:a', 'value23', 100
+put 'test','24', 'cf:a', 'value24', 100
+put 'test','25', 'cf:a', 'value25', 100
+put 'test','26', 'cf:a', 'value26', 100
+put 'test','27', 'cf:a', 'value27', 100
+put 'test','28', 'cf:a', 'value28', 100
+put 'test','29', 'cf:a', 'value29', 100
+put 'test','30', 'cf:a', 'value30', 100
+put 'test','31', 'cf:a', 'value31', 100
+put 'test','32', 'cf:a', 'value32', 100
+put 'test','33', 'cf:a', 'value33', 100
+put 'test','34', 'cf:a', 'value34', 100
+put 'test','35', 'cf:a', 'value35', 100
+put 'test','36', 'cf:a', 'value36', 100
+put 'test','37', 'cf:a', 'value37', 100
+put 'test','38', 'cf:a', 'value38', 100
+put 'test','39', 'cf:a', 'value39', 100
+put 'test','40', 'cf:a', 'value40', 100
+put 'test','41', 'cf:a', 'value41', 100
+put 'test','42', 'cf:a', 'value42', 100
+put 'test','43', 'cf:a', 'value43', 100
+put 'test','44', 'cf:a', 'value44', 100
+put 'test','45', 'cf:a', 'value45', 100
+put 'test','46', 'cf:a', 'value46', 100
+put 'test','47', 'cf:a', 'value47', 100
+put 'test','48', 'cf:a', 'value48', 100
+put 'test','49', 'cf:a', 'value49', 100
+put 'test','50', 'cf:a', 'value50', 100
+put 'test','51', 'cf:a', 'value51', 100
+put 'test','52', 'cf:a', 'value52', 100
+put 'test','53', 'cf:a', 'value53', 100
+put 'test','54', 'cf:a', 'value54', 100
+put 'test','55', 'cf:a', 'value55', 100
+put 'test','56', 'cf:a', 'value56', 100
+put 'test','57', 'cf:a', 'value57', 100
+put 'test','58', 'cf:a', 'value58', 100
+put 'test','59', 'cf:a', 'value59', 100
+put 'test','60', 'cf:a', 'value60', 100
+put 'test','61', 'cf:a', 'value61', 100
+put 'test','62', 'cf:a', 'value62', 100
+put 'test','63', 'cf:a', 'value63', 100
+put 'test','64', 'cf:a', 'value64', 100
+put 'test','65', 'cf:a', 'value65', 100
+put 'test','66', 'cf:a', 'value66', 100
+put 'test','67', 'cf:a', 'value67', 100
+put 'test','68', 'cf:a', 'value68', 100
+put 'test','69', 'cf:a', 'value69', 100
+put 'test','70', 'cf:a', 'value70', 100
+put 'test','71', 'cf:a', 'value71', 100
+put 'test','72', 'cf:a', 'value72', 100
+put 'test','73', 'cf:a', 'value73', 100
+put 'test','74', 'cf:a', 'value74', 100
+put 'test','75', 'cf:a', 'value75', 100
+put 'test','76', 'cf:a', 'value76', 100
+put 'test','77', 'cf:a', 'value77', 100
+put 'test','78', 'cf:a', 'value78', 100
+put 'test','79', 'cf:a', 'value79', 100
+put 'test','80', 'cf:a', 'value80', 100
+put 'test','81', 'cf:a', 'value81', 100
+put 'test','82', 'cf:a', 'value82', 100
+put 'test','83', 'cf:a', 'value83', 100
+put 'test','84', 'cf:a', 'value84', 100
+put 'test','85', 'cf:a', 'value85', 100
+put 'test','86', 'cf:a', 'value86', 100
+put 'test','87', 'cf:a', 'value87', 100
+put 'test','88', 'cf:a', 'value88', 100
+put 'test','89', 'cf:a', 'value89', 100
+put 'test','90', 'cf:a', 'value90', 100
+put 'test','91', 'cf:a', 'value91', 100
+put 'test','92', 'cf:a', 'value92', 100
+put 'test','93', 'cf:a', 'value93', 100
+put 'test','94', 'cf:a', 'value94', 100
+put 'test','95', 'cf:a', 'value95', 100
+put 'test','96', 'cf:a', 'value96', 100
+put 'test','97', 'cf:a', 'value97', 100
+put 'test','98', 'cf:a', 'value98', 100
+put 'test','99', 'cf:a', 'value99', 100
+put 'test','100', 'cf:a', 'value100', 100
 snapshot 'test', 'test-snapshot'
 list_snapshots
 
+
+////////////////////Run from Unix shell on HBase master node//////////////////
+// Export the snapshot
+hbase org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot test-snapshot -copy-to /integration-test/data -mappers 16
+
+// Create the hashes for the table. Run the command from unix shell on an HBase
+// node.
+hbase org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=100 --numhashfiles=10 test /integration-test/hashtable
+
+// Export the data into GCS
+hadoop fs -copyToLocal /integration-test /tmp/
+gsutil cp -r /tmp/integration-test gs://<my-bucket>/
+
+// GCS bucket should look like this:
+$ gsutil ls gs://<my-bucket>/integration-test/data
+gs://<my-bucket>/integration-test/data/
+gs://<my-bucket>/integration-test/data/.hbase-snapshot/
+gs://<my-bucket>/integration-test/data/archive/
+$ gsutil ls gs://<my-bucket>/integration-test/hashtable
+gs://<my-bucket>/integration-test/hashtable/manifest
+gs://<my-bucket>/integration-test/hashtable/partitions
+gs://<my-bucket>/integration-test/hashtable/hashes/
+
+// Run from HBase shell. Run `hbase shell` from unix terminal on HBase master.
+// clean up the table
 disable 'test'
 drop 'test'
 exit
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
new file mode 100644
index 0000000000..03ac02e452
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
@@ -0,0 +1,2 @@
+
+test-snapshottestϹ���. (@���������
\ No newline at end of file
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/data.manifest
similarity index 55%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/data.manifest
index 180516dc03055633111dc6316e9e50e08c8196ba..6439f06130b1e7f80e67897eedf5a1e582d1545c 100644
GIT binary patch
delta 467
zcmZ9IF-n9n5QT9?Slg`@OIPq0S(r&ClbJ)<=m|1O<_O{eZ0rPY;0Xk8u&wM#Z0xi&
zq9|_nnveIrx0!F|Pe*%)=Zn?+cKdes`tUNF4!+al{<ID?Gdp^fZp6^ci<T%5F-lbn
z%los%)#Bub-R@0mmqx%zT2`ZyQPBfhb)?dRsQoA2M@RPViIoCIBkNEG>RBtL08m5w
zA})XZj%TN9)xgc66%Zp&SYk|=iyIKQd=r0Gt6^2VC*{#o8Hqq8wh~&kL@f!H9jlu`
zD07#grQRB`K#|IBPDW_lX@~fjP3s|^qG)V^g|hg#hlHvb19ID4emw{)2DuA}a-_~d
o3spqyC5)Ej{r^`N`_pQy=&gdZ&W#9JG>AA*%@ut3E}zdn0Vgezc>n+a

delta 499
zcmZXQK}wuK5QUjB!bn0ElYb%EP8NpnLswUIb=56IPf*p>eV%|<$N>U^l1&y~A{P+6
zhOLVvGW+-VzV{w>54)eAH)q%TFT3AQKmI)4TyB<|`|Yx}FvVt~I*@7^4lNB)QInAZ
z<ox^B{oB@ulVwG&Dh7-_60w7#fb<LzwE#OEw|;Gw)ze_HX38@n0~{(aP%n}v*8Qb*
zwLko4*tVZmu`xhWN6SRfnxxt(x;T&4lhtF^-85`jshCMvL(-fT&|q*HJ)EJu8h-oB
zI&3aZ|NUYw3v()1xCjCgKo*yfQ3}_X>d}s=@82zJh?6>2LIq0I4NH`sV$KsRP0w%N
yzIm~_U?Lio-Iyv(<E$OsdL1D-;F$AqyBxT<HkLW3jQIs>Fa&TVrRoXOY&`>vR+#$$

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b
similarity index 86%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b
index 5320c6c58dbe6e391c4994185de1655b0e874b09..1b91b948d8df70a6137e860707da34bc8dfb702e 100644
GIT binary patch
delta 175
zcmbQBIYD!R3Cl(s9f64!GC*2)<-`bC5WRO|C73=xaTb_<GI2MU{yy;zh}Pqp_!mse
zPnKf@GYlr%Ll_Q|6BuO$fl2~d$`W%*Q;jV7q{2!zOzvaM;{;j6GKF{9*~zR-`@q&t
sKF&0U2PDkEz*q*-G&!F+jb&q*YQp53%<@1DKw~uY&E9QhWa$?G05nKA^#A|>

delta 175
zcmbQBIYD!R35(ZD%OevlWPr5Q+=&sgAbQcnN-(`_;w&(|a^h|<y>{XqFuiT!UogFE
zvK%9rv2U_HgmG|k0;8-T(1@cfWr;bZsYaH3m93g#llvI+I6)S%#5C*LO=e}<2ex+d
qai%#uAYldu#xjtm$@$D_KpC6r$v2thff|6uqzTP=wwaNoUjP7^+C!%R

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d
index cbd9f539b3fa44c3bda39f35407a12a5071cbdcf..951eb512ac0e59374112230c0c7a0e2bbd38c54b 100644
GIT binary patch
delta 175
zcmbQBIYD!R3Cl(so$C`VWPr5M@`(|$AbQ8dN-%w5;w&(IW8!Ww{e0pbF#TiVUl47~
zK3R?t%#fID4`FCdPGFQ31WH-5lqKerrdnF^9k{Z6>f}DgJWh~BEPLZ$=T2s2+6T6F
s@^Pj)JRo5P2F5awrpfutX)GH{jEyGWWR?eN02*^%s)l(pBTK&k0N)!u{{R30

delta 175
zcmbQBIYD!R35(ZDEAELFGC<mT%ftv-5WRh3C79kdaTb`~J8?IdJ}~hPm_9u5FPJ_y
zS&k9RI62uK!Z<TIfl*cvXvBGzvc#OyR7*>~a>v)VC-*Vtae^#jVS1e$I+>MeAK2Q-
q$C>8vfP@(s7|TGKCg(G!0cC8|C*Ne22WkKs^TkzX?`B4pegOb!dP2Sc

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b
index ee586c252e4a4f151be3c850154faf5f1b348647..dc89f02ec2300b7003ed249fab5a308281318d4f 100644
GIT binary patch
delta 175
zcmbQBIYD!R3Cl(s9qox0GC*1{WnzRZh%TL238vd8&H~djChi8)t0&$8(<dhW1=IH?
z%Q1o(A12#_8Tt&96BuO$H`?gSu#_d{l%^V6@;PtWS~|IpF^>~u5sT*bF3riTO#8sr
tPCm{whX*9gz`$4r(lj}rIgMpw`C_-pH<{&u8i2-BYgEN<W@PCX003yBIm`e6

delta 175
zcmbQBIYD!R35(ZD%kvX0WPr5Q$%zrNAo|S2N-%wX;w&(IY2t1$eRbj;Fnwd<Uod@V
zvK%9raeuNsgz<QC0;8-T(1@ojWr;bZsm7Lk8N25zP3~jN;{;j6vann@axyE^KCrcu
qk2B5T0SPlOFqVNdP0nXd1IpO2Prk`457YoOMoBN-c{3wRzW@MwGDNrl

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe
index 05a0cac912ce8c197302109dbb7b54431575ce14..c7fb208f72440508422bf2e7da849eab758bc7ab 100644
GIT binary patch
delta 175
zcmbQBIYD!R3Cl(sou-KvGC<m3@5BgM5Pf!HC78ZDaTb_<J#jaf{x$Iqh&JSz_!mse
zPL^W?Gqfk$Ll`!b6BuO$fl7Q?$`W%*Q_U>-O!f+_p4`Wn#|g5CMQQ)OO_N!f_JOUP
se4J?x4@j7Sfw2svX>vYu8q3Dg;{M4undN~RfW}yb$aZaJWa$?G00+N2mjD0&

delta 175
zcmbQBIYD!R35(ZD%Qq7(WPr4F?8FFJ5S=iw5=<vgoCT)SChi8)SrhMo>D-Bb!F1td
zIYuy}c(Of&Q8qb&QC1LWL?ugEVoqtQnI&J6@WgAA`xx^$K^C#Reyf)|nU!fD*xJd*
pndb0-gc%qZ%RrhY=QF1PWo&XM-(;2tY5*GZM|#!C&5SJl0s!2eLka)@

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d
index e8d9789f5e9c26ecc1eed6ed9e7061b6e0a9d461..7638f6eabba8ff4ffcb7c5752dbc5991db41cb51 100644
GIT binary patch
delta 175
zcmbQBIYD!R3Cl(sojnsRWPr5cw}}z5K-!3XVkMXspEwIlYfRh?rfnzQ0n`2y|AOh{
z$#RTfM%iS02%~*+0;8-T(1>X)Wr;bZsTP)eFBmflC-*Vtae^#jIbi;QXEH0(KCrcu
sk2B5T0SPlOFqVNdP0nXdW7$|9aAxvNW_h3npfMRw_&GN-vh)i80A<!Yy8r+H

delta 175
zcmbQBIYD!R35(ZDE9Qw7GC<mT`osuX5It*RC77N&aTb_fFmX4SUOe#*m|i;ZFPL62
zS&k9RSUuSu!dN#sfl*cvXv9XAvc#OyR0~VK>UlbqllvI+I6)S%oc{53^<-A2ePC-R
qA7`4w0}^InU@QY^nw-y^29&WenS7I39;g9mOe6b~vzr-N`UL<08A4nD

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b
index dc8da56c10141d296961f5567f3c5cd6649d72be..c6ba1f760bb956a3846658b41a1353c2e14946b8 100644
GIT binary patch
delta 175
zcmbQBIYD!R3Cl(sommqtWPr3`+{6f35S=%%5=_@loCT(PChi8)vnSpG)2k=`1=Bkw
z%Q1o(CnwuO7&j&-Fv<!7jd;#dmY7qTYHrDQb!G9%$$gA@oFI!>u4LXkJDHVfAK2Q-
r$C>8vfP@(s7|TGKCg(G!v1}|6P?~&`SsthXXpFpTp6zBvmVN;M#L7Pm

delta 175
zcmbQBIYD!R35(ZD%Wo4cWPr4F&BO><5Zy4b5==KwoCT&^C+-H*9TV?>>F$Yt!F2Cr
zIYuy}f3iJ<F==uFqpTp%h^Z`Pi8-aI=9YZZ1aAMH+{c*539^XAs;=_SWLBnqU~4BI
pXPUzU5@ujvECXqpoX?yFl(7+;e3Mxor~zn9lU$0$W=58N0RV*LLrMSu

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab
index e77357601a05ccd925e69ea4229087e12e5adac8..5a757daec80bf2711d38269c6bd78c015e8b68ed 100644
GIT binary patch
delta 175
zcmbQBIYD!R3Cl(soveu#GC<luc4CAqh}M}{38rl(&H~du6L*8@=!tj0boRu*V7hj)
z93z;~J=q?@m_0dxQC1LW#2S{e#GKMpQ%gSW@M)5h`xx^$K^C!C{EfAr%*wP6Z0+RZ
rOmlcZ!VC<IWgtzH^O@6FHkO_UpL~;99;g9m%nPyi8#gnu^a}t0$u2uJ

delta 175
zcmbQBIYD!R35(ZD%SRI}WPr4_&BO><5bZFr5=^^HoCT)cC+-H*UK8(tX}^ho!F1qc
zIYuxeWU@Vk5k5JAQC1LWL=;O|VoqtQsU_bQ1}WXieT;dWAd6TY+dJz{W@Xw3ws!Jy
pra3$yVFm`qGLWXp`OIlR85_UJH<{&u8i2;k{Ze~;Gb2mC0079FKhgjI

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2
similarity index 87%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2
index c119dd13ef4179dcde442da0461e0f1b50569ba1..d29619e3ecdd76e1442d92962be623e16ad62f76 100644
GIT binary patch
delta 175
zcmbQBIYD!R3Cl(s9hZp~GC*42cw&Ssh>n<838phA&H~eQ6L*8@$rJB@>5UWrg6X4^
z<ru+?E0gUZjJJ~$7-a>4M*L+dOUx-vHL>LTZ{{^~avx(JC&(g}=c_6|PG)7=2ex+d
rai%#uAYldu#xjtm$@$D_EE~(!O()-EmIrD88q-jlQ?!|prC$I57YaTS

delta 175
zcmbQBIYD!R35(ZD%bOD|WPr5Qi-{4kAo}&hN-+I?;w&)zY2t1${cYkMF#U7lUoib=
zvK%9r@qe;Cm|@L4Ie}4D(CekOGD}%vPHC!%B_IDzxz5RbjCq_Oi&)}X^G{A@W!eX}
scJgtiIXoa?1_s75kfzD`%xORw8^_5vndN~RfX2*LwQk(Z$kHzW0AsR4od5s;

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85
similarity index 86%
rename from bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9
rename to bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85
index d640fc8498e06935ecaf06a2714dda361af9ed7e..337b5f9280f2074041f28b211fc1c08e356fce63 100644
GIT binary patch
delta 232
zcmdn2xmk093Cl(s?W+?lWPr5J>xmI^K-$V|Vikzi`8#orERfdapSTxHt4_QJrp+e)
z2h;A86&S&c_{olpvcW)=1uSKWIi;zF<~$4x41z$+&cI}7!IYeq$iTo@22{`lRbau3
zq`(qM!4#+hOTI73e;!QU$e70oav=*3^S23;HJJ7R-J^YV@++n}JRk<dR4cQ|lbF+3
bHWt^Nn*4`Z9;iWg3X9>+xwV^BSo#G3wO~m{

delta 232
zcmdn2xmk0935(ZD%Z(E)WPr34|HKG6AZ>kVVikzC5}7zh7DP)-+zX~vCf)<n8WaD6
zX`RUmj9`Z0WJgBXV4zA<ma@d0(o{op9tH*mK_F&lU^28|N={2;U|=i*DwqgWV8M%|
zz!FKpOsE1&zR!Gpa+5bQ=5c~t$in^MW5Hw%rhP#7SZ<vBifIlHhygLx`qJb{%xORw
Zo5aa~nB{>QtY)$-y*rzKvkFVU00840OZNZ(

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/_SUCCESS b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/_SUCCESS
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data
new file mode 100644
index 0000000000000000000000000000000000000000..26334294df07a48144f34b9993aed0ed2746d4b3
GIT binary patch
literal 158
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000wj7j(*ryrSmUs)4D@e=eth>1tox(1tQBJa7DJ~W<SFwmX{5jGv54gssjMg
C963b*

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index
new file mode 100644
index 0000000000000000000000000000000000000000..f7ac1fc941eeeb049e2eb787522db7a2b6fdcf8b
GIT binary patch
literal 220
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1Yc)s8B1PJG`i)s
zZrb#U8p;1afG)<!SurP>k%57gft3fyN=QfmYGh#m2|ot1I2ky>!VJw;3=C}y^#HjI
BOz!{y

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data
new file mode 100644
index 0000000000000000000000000000000000000000..87b715673c072c3c938847bb34e3d87458df53d8
GIT binary patch
literal 534
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000wa``wmpw%clv)cX_ji8}9%LN&+z}5HlJA34zX!>vx*6xT?>7S8U^U`idJ+
zObUqEfSAe90Hi|R|8-=3z%)%i)l(f-K{A0rF=-%X2V!PJ1F(|S!B;A_voBv~r1#_6
z+00Yxfl>%Vz}B!hD@B`#ZEJaAC37UKTyPeO3L_v-;QG5o4Z#yUKP|mkA+yh`!5Bpa
zh$kT4ar4Ptw)W;r$G3g_p!@X`iV72uipNiOxSzdY`9osu9hV;;6;7b2Fa@bFw)xV-
z-pJcN!LrTrNYPp!6cuJ570TKy@1y2~8c$M}n0%A5Sqw#mIY>p^whJj4b2B>?9%g>J
z?3X8wqQU~Cg8i4h<&O&W%f0ul{_R(H=S5Lr2~uIMwWR#jw+D^qOAow>+x$TU0Kb`(
AeE<Le

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index
new file mode 100644
index 0000000000000000000000000000000000000000..4edcbd1ed54689f7fa7c0e7df1bf9a5e42e56056
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1XJF8_*14_a_(E`
zlJ`YF<i!620lF9?XT_XkRt5$(1~y(GD<L7lgoS~@g25Ol{upQmCj%!BNSvYBih-ex
Gp&kHrOH#T3

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data
new file mode 100644
index 0000000000000000000000000000000000000000..4b59b346f0deb2a4278648944a7270e90c6eb90d
GIT binary patch
literal 499
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000z^1yGsw1#Rgh03;jMvT6`{0P!fn)ftb+<NC*g?Q9SrtgtOvqLY{(#5w{Ic
zObUqEfSAe10Hnh9L#*cN@ClMat-9AAeGXPZQDF#DQBpml!@2Y9%6`H1oHs<nUZSV~
z+j7i9qds0D^2OJ!%^p%+e@~;RFb0_+<6A2bS5uej#H1E2us%-<MTH4Sh1x^mpw}hA
z@1_3pZ3_DC?Si7h6r`g2x>4KI$cwJ5&y`B{+xfgjQDFvB!DpCZC%c*T!I76TclZ|_
zx{IR19Hip)BeT64@eji}y1%?QyN5L!MTG@Og;mIjt0{RmgKw<a_R2_Jb|Q)jOOT2x
Uwx1L3Bwb0hxw+`<&xdI%0T0WF%m4rY

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index
new file mode 100644
index 0000000000000000000000000000000000000000..4169ee8258fe08cf71236bc8d39f98bb7b2349c3
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1c~Q=ExTK`ne9zl
zfBxObzUBXc09}lcvtmv%D+2=?0~;@pm5`8N!o<K}$zTE$e+)E(lYx^5B+k%m#lX<U
GP!9l8>r!9<

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data
new file mode 100644
index 0000000000000000000000000000000000000000..a05197b51daa805ee91c57d9363da0daa6ee2189
GIT binary patch
literal 499
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000!YIlFphgZyz19_}0q2tDG4qC<(-@K+I?iBm|<C-n9}{cAqW4#&mWS!;%d^
zF)1Ks17ap)1CR>q|K7DzxTTm~EqhLSz4nelQDF#DvC#E~+4@Uc)?L_pRj6rCLo$jA
zBan(3mpGB12h6`Y?`*K+^eVAMQ2};G80Uf_lg-P12R^H?o*wXS28s$3kQuGk8*}Vp
zw0T!Hh`i$e>nVYv!W5)p<(lk<gPz4az3ZH{o~dw!qNp$fsVJ;upAx~ex_!~a0Od62
z#XC?`n1fXG%@%u97(HRP>6NljRdz8k6crXA6@l-PB79GIIsOt%IN$N(YdeYxOOOiA
UzLlcW7`+Vd-SaYd^8KO?03}C*lmGw#

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index
new file mode 100644
index 0000000000000000000000000000000000000000..9228013bfa781e1fdfdd8b1ea55ca72769c171fd
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1QXR=R!%MBYIAzd
zwBGg9^lkrv09}lcvtmv%D+2=?0~;@pm5`8N!p6X0#b62)e+)E(lYx^5B+k%m#lX<U
GP!9kn%1$Hz

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data
new file mode 100644
index 0000000000000000000000000000000000000000..6e29b085e7ea76145e5c0d89a89d2643e40f6ad1
GIT binary patch
literal 499
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000#de(vI^z@=m^4Vi*_Cdpip#C<(-@K+I?YBn0&K|B2spuYC0m^DSa!$8G(9
zVp2fN2E<Gz1|Su#O$Na_Sz8%u%lp>0EaF{_qQVfQ;$;1Dt@b^Ehc~#~ZL7ccG7UwA
z5lF@PIpPs{>m`EuGS&)x4L)vxqQV%Y!o%ZCz}A;v+!vTU2$hX$+>W9G?9#tsF<V|)
zTn)|(yW$!8bBZ8}3R937bEcPMJv@B3VueV2^!5EYvM4IdKq@qn%W{~HYCN2j;X19^
zylM}M3UiQ(g9_hrcW;+4sl8z<vQ_NZ3ltR~cMJSq%Y19q%Dgk%F8M5PU!FY|MTI3u
Wh0m_aeFCR%7GAgBoE(yQ?=}FF@{h0p

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index
new file mode 100644
index 0000000000000000000000000000000000000000..245c2ceb3fbc406d156b185abd5c4ce709f2f58b
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1Q#|pG4)=y(U$qr
zQt^UG@AiKnKo?`=teBI`%D}+Jz{U$?B_t%6FfuS$GnfIz9|O(cWZ>ihi8C}?F)*|-
G)B^xx2Tsob

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data
new file mode 100644
index 0000000000000000000000000000000000000000..40cbf30418cb09231e837d21346d0da221394d1e
GIT binary patch
literal 499
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000#c<6^B>+J-t@zllQ_*|L{bhpd=8p0x_d0kPt9`@~nK@C+6IzOR5*;e%rwf
z6q5pCHXvp)H2|rY8yq_K(N@<5{N{W41d?5^pr|kesc`yezDDPxYLK$b6oba+-@c%z
zFaoJy$>@63(EhFOvyNBcucgH+P*fO$R8(F+Eb(7*qRITSMQ5DdV%DLkFafDJF1RS>
z;vJ2&BUbFWL1r_4qNo7-#V#O-d08H}Tu6LUfqTR$FBBDKATwf01gA!%W%azhC2zyW
z7<>>#g*iw?yVly}^^E?n`d2(%RIp!I0!4)dNX0q7Sw`<x?OMU!)Gx4hQCI<r3QLd*
V#`@h|+Q+^X%c)P8w|cq9djNpbilYDk

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index
new file mode 100644
index 0000000000000000000000000000000000000000..dbbacaf8f0b917747b1f8c86d15193806d1d97dd
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1n;i2zth?lz2=@_
zuKB5;g&F^W09}lcvtmv%D+2=?0~;@pm5`8N0_52+m;=Qh1I^%M;N$^`Gc;Q<Ftjn$
F0{{uwQ6~TZ

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data
new file mode 100644
index 0000000000000000000000000000000000000000..3f0e32269c2d5274dcebbcb4621ef8657bceed30
GIT binary patch
literal 499
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000y@mIoYQ4oK)3jUhr_=mk%p}f|5YY3dD?NKtiDVliHTeJ)x_Ob>qH1`1IES
zC?*BOY(UIpW&k7wuB({*IH<Q`%0;yW=c}iGazs&K2vQOCv>;wXT>jw4a)qgWx{F;<
zR2YF&yf@Ku;=R~ZAKN6nVQ)v%2^1B^AQkJL^vYlQ&6^stL-<3%BI(5_Doj8su9|$3
zx~Sz8^IbkUfGsQe8Hx&1kct=6E~<MTOK(#4Zht!^Anz}V3b1cewL}6>%>Kri&pvb7
z+=Z{IP*j+M%-E1wHpA%eN$pOx+$U_NJHMl-umGvZ;O}1X)*vDMV4$U0NT|yM6cv^r
X6>A>--*;l_Io3Xpx%CUuAFToa5vh*G

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index
new file mode 100644
index 0000000000000000000000000000000000000000..a0818358eb72b6aaf321eae16c134e76fa900a8b
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1PAY$73Xd}f9Arq
z4QE#F5@`Mp1n6RnoE3ADSs57E7}$7$tb~LF6J`bmTLufD_+y|MoD7^iAaRCfD+Y!(
GhI#;(6jO5m

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data
new file mode 100644
index 0000000000000000000000000000000000000000..effda57ece6b2d945b6af33fe25b9138a781b8ff
GIT binary patch
literal 499
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000x`?XIOSxf7JhZ)=)uYSso)$P!fn)ftb-8NC=#8F?;8+d+sBn_XZajx7S|;
zib(-68xS*@8-P@(UMz9=p(pWbl0!+%l`Y4=qo^<hsc^r4=h1_(sWlS?4)v}6B&Ud?
z!U&`ykl{@J*G7HbIJbb@nYvPcP*fO$REW#S%v0F@G+piOw)WXIj<zT&Oh76gSkK<i
z+qSL!o5)YMPi9GtC@M@rD%#}~qjp^T&UZj$DZ8M-q<ttV%s?tcdA?2F_+-UABjeR&
z2i>a+P*i}!V=uErT$bWr)iVh;Kjl_kXhl(B0Wu?;>&UK@xj(+%uG<<t^_BK26cv^r
W6@DN6@2q?>d!N$%H1$OV({llum5v(#

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index
new file mode 100644
index 0000000000000000000000000000000000000000..a8eb1a1748bad7bffacfaf7ce6d21a7d8118bb97
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1ka>5FS_F$<+18j
zR@#wQkM#co0lF9?XT_XkRt5$(1~y(GD<L7lgq?xGj=>Tr{upQmCj%!BNSvYBih-ex
Gp&kGrG*QI>

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data
new file mode 100644
index 0000000000000000000000000000000000000000..011b956c5f23f9af42f3247c61757e30323ee168
GIT binary patch
literal 499
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000zyiT?W5-c@Oa&Fge0;;M6^!pd=8p0x_cnkPs;7%i0>8C({t>``3KKTH6+&
zm=qAR0Wp(>0Z7I9<yU_4hn`ur<w2gRiRY}_C@Ks=Dqg?g-k!04ug;^xjwgIoW==v;
zVFXga@GJRC{G=Cm`DL#6c-)`aiK4<7q+)SK%()4Y_R3a*u9x3m-lmVD!UUvZ-`SaG
zmUu=+KYt;6ZneFpG>QsSkc$0UGBt+JPK)*}Tu|%E;EigF8AwI!+&OzD98b)XTcNl@
zH_?0{iW%l06>sjWoO>dkHDTM5M^jnmi^-s<0EgI$FDYN{2`qBovQpXY8e80D6cv^r
XGi(f7l_%Wzwdwbf+_}rSk7)t`Qs0j{

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index
new file mode 100644
index 0000000000000000000000000000000000000000..fada13a25649e97d10371af604bb0f353ab3ef6a
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1S@<E&Q>gD*z~Kh
z^ysO-QcC}U09}lcvtmv%D+2=?0~;@pm5`9I1*p@W!3rq;7|7ye;N$^`Gc;Q<Ftjn$
F0|3T0P^tg`

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data
new file mode 100644
index 0000000000000000000000000000000000000000..f55fa79aca13a12577fa3dc8758c209e6c698ac1
GIT binary patch
literal 499
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
zb4b_000vJc-gbFuYkT5IV2zouzl;M=P!fn)ftb+}NC-^u`{ns`qilPV`TZTc-|Pwk
zib(-68xS*D8h})+)ahF@H+5>B^_1R|xsRXMqNp$gshE+(oobgM_=kCi;GP%X&M!kz
zVFXeUlK!M<Yn1s`*$##Ivj1amp{OthsW8;hVYA!aQ+r~k`I_e%F^MQDOh78m_)ClO
z6n9;Hyg%4W`Dv{uiV9PZihI}hcs={9pXwYoC4|8!;Wdg1GmwfNL!AiUNRKe5lZ*VH
zOpdccQDF{J!7g^`46~z}b;j~u{rkq{l_)AKKq@+2O&Stsvd1^;Hh&J*7d(xk0vx6~
Uj~`!{EW(m;;9Z_iv76u%0D=IA_y7O^

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index
new file mode 100644
index 0000000000000000000000000000000000000000..8c8793cef8166e1551f1e435517d06eb765d43c7
GIT binary patch
literal 221
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+{B=cU6`FfwXkRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1i|Igk4IIux92q`
zYUuu1!~P!#(8U-zE9NA#GBB_)u<-&}2?+^XSQr=_7_5QfkAY@zGH~*M#2K2c7#P|Z
F>H#`*PJ93W

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest
new file mode 100644
index 0000000000..a95421d027
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest
@@ -0,0 +1,4 @@
+#Wed Dec 30 01:23:41 UTC 2020
+numHashFiles=10
+table=test
+targetBatchSize=10
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions
new file mode 100644
index 0000000000000000000000000000000000000000..1d447dd67a92849e26ffb1864ce2bc5135dfebf9
GIT binary patch
literal 342
zcmWG`4P-OSFG|--EJ#ewNY%?oOv%qL(91|lEKb$S%-8eG%`GiSOv*`hsw_z@4ll|C
za}+Uk0M+`H=H$RsFfwRiRg;{bTTql*T&(AknwD6aQ{tSTlA6o_1mV{-<yZBuv@U<c
z{;2TF#T=jzFA%c=F{5F{9A+TRzzD=}K_hH}#@GZ+unC%C6EwpnXpT+L0-K;EOb`H6
CPeqUb

literal 0
HcmV?d00001

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
index 62f1cdced2..e7f777f9bc 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
@@ -18,37 +18,54 @@
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.api.services.storage.model.Objects;
-import com.google.cloud.bigtable.beam.sequencefiles.testing.BigtableTableUtils;
+import com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot.ImportOptions;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
 import com.google.cloud.bigtable.hbase.BigtableConfiguration;
 import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
 import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
 import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 import org.apache.beam.runners.dataflow.DataflowRunner;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
+import org.apache.beam.sdk.PipelineResult;
 import org.apache.beam.sdk.PipelineResult.State;
 import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
 import org.apache.beam.sdk.extensions.gcp.util.GcsUtil;
 import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
 /*
- * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable.
+ * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable and
+ * validates the imported data with SyncTable.
  * Prepare test data with gsutil(https://cloud.google.com/storage/docs/quickstart-gsutil):
- * gsutil -m cp -r <PATH_TO_REPO>/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/ \
- *  gs://<test_bucket>/integration-test/
+ * gsutil -m cp -r <PATH_TO_REPO>/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test \
+ *  gs://<test_bucket>/
  *
  * Setup GCP credential: https://cloud.google.com/docs/authentication
  *  Ensure your credential have access to Bigtable and Dataflow
@@ -87,6 +104,8 @@ public class EndToEndIT {
 
   // Snapshot data setup
   private String hbaseSnapshotDir;
+  private String hashDir;
+  private String syncTableOutputDir;
 
   @Before
   public void setup() throws Exception {
@@ -101,6 +120,13 @@ public void setup() throws Exception {
 
     hbaseSnapshotDir = cloudTestDataFolder + "data/";
     UUID test_uuid = UUID.randomUUID();
+    hashDir = cloudTestDataFolder + "hashtable/";
+
+    syncTableOutputDir = dataflowStagingLocation;
+    if (!syncTableOutputDir.endsWith(File.separator)) {
+      syncTableOutputDir = syncTableOutputDir + File.separator;
+    }
+    syncTableOutputDir = syncTableOutputDir + "sync-table-output/" + test_uuid + "/";
 
     // Cloud Storage config
     GcpOptions gcpOptions = PipelineOptionsFactory.create().as(GcpOptions.class);
@@ -118,6 +144,12 @@ public void setup() throws Exception {
     for (int i = 0; i < keys.length; i++) {
       keySplits[i] = keys[i].getBytes();
     }
+
+    // Create table in Bigtable
+    TableName tableName = TableName.valueOf(tableId);
+    HTableDescriptor descriptor = new HTableDescriptor(tableName);
+    descriptor.addFamily(new HColumnDescriptor(CF));
+    connection.getAdmin().createTable(descriptor, SnapshotTestingUtils.getSplitKeys());
   }
 
   private static String getTestProperty(String name) {
@@ -126,6 +158,17 @@ private static String getTestProperty(String name) {
 
   @After
   public void teardown() throws IOException {
+    final List<GcsPath> paths = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "/*"));
+
+    if (!paths.isEmpty()) {
+      final List<String> pathStrs = new ArrayList<>();
+
+      for (GcsPath path : paths) {
+        pathStrs.add(path.toString());
+      }
+      this.gcsUtil.remove(pathStrs);
+    }
+
     connection.close();
 
     // delete test table
@@ -134,18 +177,28 @@ public void teardown() throws IOException {
         .deleteTable(TableName.valueOf(tableId));
   }
 
-  @Test
-  public void testHBaseSnapshotImport() throws Exception {
-
-    // Crete table
-    TableName tableName = TableName.valueOf(tableId);
-    HTableDescriptor descriptor = new HTableDescriptor(tableName);
+  private SyncTableOptions createSyncTableOptions() {
+    DataflowPipelineOptions syncTableOpts =
+        PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    syncTableOpts.setRunner(DataflowRunner.class);
+    syncTableOpts.setGcpTempLocation(dataflowStagingLocation);
+    syncTableOpts.setNumWorkers(1);
+    syncTableOpts.setProject(projectId);
 
-    descriptor.addFamily(new HColumnDescriptor(CF));
+    SyncTableOptions syncOpts = syncTableOpts.as(SyncTableOptions.class);
+    // Setup Bigtable params
+    syncOpts.setBigtableProject(StaticValueProvider.of(projectId));
+    syncOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId));
+    syncOpts.setBigtableTableId(StaticValueProvider.of(tableId));
+    syncOpts.setBigtableAppProfileId(null);
 
-    connection.getAdmin().createTable(descriptor, SnapshotTestingUtils.getSplitKeys());
+    // Setup Hashes
+    syncOpts.setHashTableOutputDir(StaticValueProvider.of(hashDir));
+    syncOpts.setOutputPrefix(StaticValueProvider.of(syncTableOutputDir));
+    return syncOpts;
+  }
 
-    // Start import
+  private ImportOptions createImportOptions() {
     DataflowPipelineOptions importPipelineOpts =
         PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     importPipelineOpts.setRunner(DataflowRunner.class);
@@ -154,10 +207,9 @@ public void testHBaseSnapshotImport() throws Exception {
     importPipelineOpts.setProject(projectId);
     importPipelineOpts.setRegion(region);
 
-    ImportJobFromHbaseSnapshot.ImportOptions importOpts =
-        importPipelineOpts.as(ImportJobFromHbaseSnapshot.ImportOptions.class);
+    ImportOptions importOpts = importPipelineOpts.as(ImportOptions.class);
 
-    // setup GCP and bigtable
+    // setup Bigtable options
     importOpts.setBigtableProject(StaticValueProvider.of(projectId));
     importOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId));
     importOpts.setBigtableTableId(StaticValueProvider.of(tableId));
@@ -165,17 +217,25 @@ public void testHBaseSnapshotImport() throws Exception {
     // setup HBase snapshot info
     importOpts.setHbaseSnapshotSourceDir(hbaseSnapshotDir);
     importOpts.setSnapshotName(TEST_SNAPSHOT_NAME);
+    return importOpts;
+  }
+
+  private Map<String, Long> getCountMap(PipelineResult result) {
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    return StreamSupport.stream(metrics.getCounters().spliterator(), false)
+        .collect(Collectors.toMap((m) -> m.getName().getName(), (m) -> m.getAttempted()));
+  }
+
+  @Test
+  public void testHBaseSnapshotImport() throws Exception {
+
+    // Start import
+    ImportOptions importOpts = createImportOptions();
 
     // run pipeline
     State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish();
     Assert.assertEquals(State.DONE, state);
 
-    // check data in bigtable
-    BigtableTableUtils destTable = new BigtableTableUtils(connection, tableId, CF);
-    Assert.assertEquals(
-        100 /* There are 100 rows in test snapshot*/,
-        destTable.readAllCellsFromTable().toArray().length);
-
     // check that the .restore dir used for temp files has been removed
     Objects objects =
         gcsUtil.listObjects(
@@ -185,6 +245,81 @@ public void testHBaseSnapshotImport() throws Exception {
             null);
     Assert.assertNull(objects.getItems());
 
-    // TODO(vermas2012): Add more validations after this.
+    SyncTableOptions syncOpts = createSyncTableOptions();
+
+    PipelineResult result = SyncTableJob.buildPipeline(syncOpts).run();
+    state = result.waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    List<GcsPath> outputs = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "*"));
+    // FileSink will write an empty file when there are no mismatches
+    Assert.assertEquals(1, outputs.size());
+    // TODO read the actual files and validate the ranges instead of size check
+    Assert.assertEquals(0, gcsUtil.fileSize(outputs.get(0)));
+
+    // Validate the counters.
+    Map<String, Long> counters = getCountMap(result);
+    Assert.assertEquals(counters.size(), 1);
+    Assert.assertEquals(counters.get("ranges_matched"), (Long) 101L);
+  }
+
+  /**
+   * Introduces multiple corruptions in imported table and validates that sync-table can detect
+   * them.
+   */
+  @Test
+  public void testHBaseSnapshotImportWithCorruptions() throws Exception {
+    // Import snapshot
+    ImportOptions importOpts = createImportOptions();
+    State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    // Introduce corruptions to the data in Bigtable. Delete data from Bigtable to simulate Bigtable
+    // missing data. Add data to Bigtable to simulate extra data in Bigtable. It is easier to update
+    // Bigtable than change the snapshots.
+    Table table = connection.getTable(TableName.valueOf(tableId));
+    Cell cellInMiddle = table.get(new Get("24".getBytes())).rawCells()[0];
+    List<Put> puts =
+        Arrays.asList(
+            // Add a row at the start
+            new Put(Bytes.toBytes("000"))
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 1L, "value000".getBytes())
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 2L, "value001".getBytes()),
+            // change a cell in middle
+            new Put(cellInMiddle.getRowArray())
+                .addColumn(
+                    cellInMiddle.getFamilyArray(),
+                    cellInMiddle.getQualifierArray(),
+                    cellInMiddle.getTimestamp(),
+                    "corrupted_val".getBytes()),
+            // add a new row in the end
+            new Put(Bytes.toBytes("9999"))
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 100L, "value999".getBytes()));
+
+    table.put(puts);
+    // Delete a random row in the middle. We should see 4 ranges mismatch as table is split on
+    // 1,2...9. We are splitting on 31, delete in 60s.
+    table.delete(new Delete("64".getBytes()));
+
+    // Run SyncTable job and expect 4 mismatches.
+    SyncTableOptions syncOpts = createSyncTableOptions();
+    PipelineResult result = SyncTableJob.buildPipeline(syncOpts).run();
+    state = result.waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    List<GcsPath> outputs = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "*"));
+
+    System.out.println("OUTPUTS: " + outputs);
+    // FileSink will shard the outputs and will created >1 files.
+    Assert.assertTrue(outputs.size() > 1);
+    // TODO read the files and validate that the ranges are there instead of size check.
+    Assert.assertTrue((gcsUtil.fileSize(outputs.get(0)) + gcsUtil.fileSize(outputs.get(1))) > 0);
+
+    // gcsUtil.getObject(outputs.get(0));
+
+    Map<String, Long> counters = getCountMap(result);
+    Assert.assertEquals(counters.size(), 2);
+    Assert.assertEquals(counters.get("ranges_matched"), (Long) 97L);
+    Assert.assertEquals(counters.get("ranges_not_matched"), (Long) 4L);
   }
 }
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
new file mode 100644
index 0000000000..ee574a9c2d
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.SourceTestUtils;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class BufferedHadoopHashTableSourceTest {
+
+  private BufferedHadoopHashTableSource bufferedSource;
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final String HASH_TABLE_OUTPUT_PATH_DIR = "gs://my-bucket/outputDir";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("AAAA".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW =
+      new ImmutableBytesWritable("ZZZZ".getBytes());
+  private static final ImmutableBytesWritable POST_STOP_ROW =
+      new ImmutableBytesWritable("z".getBytes()); // Lowercase z is lexicographically > uppercase Z
+  private static final ImmutableBytesWritable EMPTY_ROW =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+  private static final ImmutableBytesWritable START_HASH =
+      new ImmutableBytesWritable("START-HASH".getBytes());
+  private static final int BATCH_SIZE = 5;
+
+  @Before
+  public void setUp() throws Exception {
+    fakeTableHashWrapper =
+        new FakeTableHashWrapper(
+            START_ROW, STOP_ROW, new ArrayList<>(), new ArrayList<>(), new Scan());
+    bufferedSource =
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of("cbt-dev"),
+                StaticValueProvider.of(HASH_TABLE_OUTPUT_PATH_DIR),
+                START_ROW,
+                STOP_ROW,
+                new FakeTableHashWrapperFactory(fakeTableHashWrapper)),
+            BATCH_SIZE);
+  }
+
+  protected static ImmutableBytesWritable getKey(int keyIndex) {
+    return new ImmutableBytesWritable(("KEY-" + keyIndex).getBytes());
+  }
+
+  protected static ImmutableBytesWritable getHash(int hashIndex) {
+    return new ImmutableBytesWritable(("HASH-" + hashIndex).getBytes());
+  }
+
+  /**
+   * Populates the fakeTableHashWrapper with {@code numEntries} entries starting with startKey.
+   * Returns a List of expected RangeHashes for this data, for numEntries=1, single RangeHash is
+   * returned (startRow, stopRow, START_HASH).
+   */
+  protected List<KV<String, List<RangeHash>>> setupTestData(
+      ImmutableBytesWritable startRow, ImmutableBytesWritable stopRow, int numEntries) {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.hashes.add(KV.of(startRow, START_HASH));
+    for (int i = 0; i < numEntries - 1; i++) {
+      fakeTableHashWrapper.hashes.add(KV.of(getKey(i), getHash(i)));
+    }
+
+    List<KV<String, List<RangeHash>>> out = new ArrayList<>();
+    // Setup RangeHashes to be returned
+    List<RangeHash> expectedRangeHashes = new ArrayList<>();
+    ImmutableBytesWritable key = startRow;
+    ImmutableBytesWritable hash = START_HASH;
+    for (int i = 0; i < numEntries - 1; i++) {
+      expectedRangeHashes.add(RangeHash.of(key, getKey(i), hash));
+      key = getKey(i);
+      hash = getHash(i);
+      if (expectedRangeHashes.size() % BATCH_SIZE == 0) {
+        out.add(
+            KV.of(
+                Bytes.toStringBinary(expectedRangeHashes.get(0).startInclusive.copyBytes()),
+                expectedRangeHashes));
+        expectedRangeHashes = new ArrayList<>();
+      }
+    }
+    // Process the last range
+    expectedRangeHashes.add(RangeHash.of(key, stopRow, hash));
+    // Finalize the last batch
+    out.add(
+        KV.of(
+            Bytes.toStringBinary(expectedRangeHashes.get(0).startInclusive.copyBytes()),
+            expectedRangeHashes));
+
+    return out;
+  }
+
+  @Test
+  public void testHashReaderEmpty() throws IOException {
+    // The tableHashWrapper has no hashes, this should result in empty source.
+    assertEquals(Arrays.asList(), SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderPartialBuffer() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleBatches() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 20);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleBatchesWithPartialBatchAtEnd() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 23);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testSplitEqualsUnsplit() throws Exception {
+    fakeTableHashWrapper.partitions = Arrays.asList(getKey(4), getKey(9));
+    SourceTestUtils.assertSourcesEqualReferenceSource(
+        bufferedSource, bufferedSource.split(0, null), null);
+  }
+
+  @Test
+  public void testUnstartedReaderEqualsStarted() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(
+        bufferedSource.createReader(null), null);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
new file mode 100644
index 0000000000..ed725d8d37
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
@@ -0,0 +1,444 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.models.GCRules.GCRULES;
+
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient;
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings;
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.models.CreateTableRequest;
+import com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.emulator.v2.BigtableEmulatorRule;
+import com.google.cloud.bigtable.hbase.BigtableConfiguration;
+import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor.BigtableResultHasher;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(JUnit4.class)
+public class ComputeAndValidateHashFromBigtableDoFnTest {
+
+  private static final byte[] EMPTY_ROW_KEY = HConstants.EMPTY_BYTE_ARRAY;
+  protected final Logger LOG = LoggerFactory.getLogger(getClass());
+
+  public static final String FAKE_TABLE = "fake-table";
+  private static final String ROW_KEY_PREFIX = "row-";
+  private static final String VALUE_PREFIX = "value-";
+  private static final byte[] EXTRA_VALUE = "add".getBytes();
+  private static final byte[] CF = "cf".getBytes();
+  private static final byte[] CF2 = "cf".getBytes();
+  private static final byte[] COL = "col".getBytes();
+  private static final long TS = 1000l;
+  private static final int FIRST_ROW_INDEX = 20;
+  private static final int LAST_ROW_INDEX = 31;
+
+  @Rule
+  public final BigtableEmulatorRule bigtableEmulator = BigtableEmulatorRule.create();
+
+  @Rule
+  public final transient TestPipeline p = TestPipeline.create();
+
+  private ComputeAndValidateHashFromBigtableDoFn doFn;
+
+  // Clients that will be connected to the emulator
+  private BigtableTableAdminClient tableAdminClient;
+  private Table table;
+  // Fake a TableHashWrapper.
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private List<RangeHash> hashes;
+
+  @Before
+  public void setUp() throws IOException {
+    hashes = new ArrayList<>();
+    // Initialize the clients to connect to the emulator
+    tableAdminClient =
+        BigtableTableAdminClient.create(
+            BigtableTableAdminSettings.newBuilderForEmulator(bigtableEmulator.getPort())
+                .setProjectId("fake-project")
+                .setInstanceId("fake-instance")
+                .build());
+
+    CloudBigtableTableConfiguration config =
+        new CloudBigtableTableConfiguration.Builder()
+            .withProjectId("fake-project")
+            .withInstanceId("fake-instance")
+            .withTableId(FAKE_TABLE)
+            .withConfiguration(
+                BigtableOptionsFactory.BIGTABLE_EMULATOR_HOST_KEY,
+                "localhost:" + bigtableEmulator.getPort())
+            .build();
+
+    Connection connection = BigtableConfiguration.connect(config.toHBaseConfig());
+    table = connection.getTable(TableName.valueOf(FAKE_TABLE));
+    fakeTableHashWrapper = new FakeTableHashWrapper();
+    // Scan all the cells for the column, HBase scan fetches 1 cell/column by default
+    fakeTableHashWrapper.scan = new Scan().setMaxVersions();
+
+    FakeTableHashWrapperFactory fakeFactory = new FakeTableHashWrapperFactory(fakeTableHashWrapper);
+
+    doFn =
+        new ComputeAndValidateHashFromBigtableDoFn(
+            config,
+            StaticValueProvider.of(FAKE_TABLE),
+            StaticValueProvider.of("proj"),
+            StaticValueProvider.of("hash"),
+            fakeFactory);
+
+    // Create a test table that can be used in tests
+    tableAdminClient.createTable(
+        CreateTableRequest.of(FAKE_TABLE)
+            .addFamily(new String(CF), GCRULES.maxVersions(100))
+            .addFamily(new String(CF2), GCRULES.maxVersions(100)));
+
+    p.getCoderRegistry().registerCoderForClass(RangeHash.class, new RangeHashCoder());
+
+    // Fill CBT table with data.
+    writeDataToTable();
+  }
+
+  @After
+  public void tearDown() {
+    // TODO should we delete the table for each test?
+    tableAdminClient.deleteTable(FAKE_TABLE);
+  }
+
+  private byte[] getRowKey(int i) {
+    return (ROW_KEY_PREFIX + i).getBytes();
+  }
+
+  private byte[] getValue(int rowIndex, int cellIndex) {
+    return (VALUE_PREFIX + rowIndex + "-" + cellIndex).getBytes();
+  }
+
+  private void writeDataToTable() throws IOException {
+    List<Put> puts = new ArrayList<>();
+    // Tests use the rows 21-30. Setup some extra data simulate the real world scenario where
+    // there will be other workitems working parallely on the table.
+    for (int i = 20; i < 32; i++) {
+      for (int j = 0; j < 2; j++) {
+        // Insert rows with 2 cells each
+        Put put = new Put(getRowKey(i));
+        put.addColumn(CF, COL, TS + j, getValue(i, j));
+        puts.add(put);
+      }
+    }
+    table.put(puts);
+  }
+
+  /**
+   * Deletes the row range [startIndex, stopIndex)
+   */
+  private void deleteRange(int startIndex, int stopIndex) throws IOException {
+    for (int i = startIndex; i < stopIndex; i++) {
+      table.delete(new Delete(getRowKey(i)));
+    }
+  }
+
+  // Creates a RangeHash for range [startRow, stopRow).
+  private RangeHash createHash(byte[] startRow, byte[] stopRow) throws IOException {
+    LOG.debug("Creating hash for rows " + startRow + " to " + stopRow);
+    BigtableResultHasher hasher = new BigtableResultHasher();
+    hasher.startBatch(new ImmutableBytesWritable(startRow));
+
+    // Scan all the cells for a column.
+    Scan scan = new Scan().setMaxVersions().withStartRow(startRow).withStopRow(stopRow, false);
+
+    // Read the rows from Bigtable and compute the expected hash.
+    for (Result result : table.getScanner(scan)) {
+      LOG.debug("Adding result to hash: " + result);
+      hasher.hashResult(result);
+    }
+    hasher.finishBatch();
+    return RangeHash.of(
+        new ImmutableBytesWritable(startRow),
+        new ImmutableBytesWritable(stopRow),
+        hasher.getBatchHash());
+  }
+
+  ////////// Happy case tests for various setups//////////////////////
+  @Test
+  public void testHashMatchesForMultipleRange() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(28)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    p.run();
+  }
+
+  @Test
+  public void testHashMatchesForSingleRange() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder();
+    p.run();
+  }
+
+  @Test
+  public void testHashMatchesForFullTableScanWithMultipleRange() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), EMPTY_ROW_KEY));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    p.run();
+  }
+
+  @Test
+  public void testHashMatchesForMultipleSingleRowRange() throws Exception {
+    hashes.add(createHash(getRowKey(22), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(25)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(22)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    p.run();
+  }
+
+  ///////////////// Test mismatches when Bigtable has extra rows ////////////////////
+  @Test
+  public void testAdditionalCellInMiddle() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(30)));
+
+    // Add an extra cell in the table
+    table.put(new Put(getRowKey(25)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(1));
+    p.run();
+  }
+
+  @Test
+  public void testAdditionalRowsAtEnds() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Add an extra row in the beginning
+    table.put(new Put(getRowKey(1)).addColumn(CF, COL, EXTRA_VALUE));
+
+    // Add an extra row at the end.
+    table.put(new Put(getRowKey(5)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(0), hashes.get(2));
+    p.run();
+  }
+
+  ///////////////////// Test different values ///////////////////////////
+  @Test
+  public void testDifferentValues() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Modify the CF
+    table.delete(new Delete(getRowKey(20)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(1)).addColumn(CF2, COL, TS, getValue(20, 0)));
+
+    // Modify the qualifier
+    table.delete(new Delete(getRowKey(22)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(22)).addColumn(CF, "random-col".getBytes(), TS, getValue(22, 0)));
+
+    // Modify the timestamp
+    table.delete(new Delete(getRowKey(24)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(24)).addColumn(CF, COL, 1, getValue(24, 0)));
+
+    // Modify the value
+    table.delete(new Delete(getRowKey(26)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(26)).addColumn(CF, COL, getValue(26, 0)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(1), hashes.get(2), hashes.get(3));
+    p.run();
+  }
+
+  ////////////////// Tests with CBT missing data //////////////////////////////
+  @Test
+  public void testMissingRows() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Delete a row at the beginning
+    table.delete(new Delete(getRowKey(FIRST_ROW_INDEX)));
+
+    // Delete a row at the middle
+    table.delete(new Delete(getRowKey(24)));
+
+    // Delete a row at the end
+    table.delete(new Delete(getRowKey(LAST_ROW_INDEX)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4));
+    p.run();
+  }
+
+  @Test
+  public void testMissingRanges() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete a range at the beginning
+    deleteRange(FIRST_ROW_INDEX, 21);
+
+    // Delete a range in middle
+    deleteRange(23, 25);
+
+    // Delete row ranges at the end, bigtable scanner will finish with multiple row-ranges to process.
+    deleteRange(27, LAST_ROW_INDEX + 1);
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4), hashes.get(5));
+    p.run();
+  }
+
+  @Test
+  public void testCbtEmpty() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete all data from bigtable
+    deleteRange(FIRST_ROW_INDEX, LAST_ROW_INDEX);
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes);
+    p.run();
+  }
+
+  ////////////////////// Test that scan is used from TableHash.////////////////////////
+  @Test
+  public void testScanFromTableHash() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(30)));
+
+    // Update the TableHashWrapper Scan to default. Scan from HashTable.TableHash determines the
+    // cells used to compute hash. CBT has to use the same cells for validation.
+    fakeTableHashWrapper.scan = new Scan();
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes);
+    p.run();
+  }
+
+  ////////////////////// Combination of different cases //////////////////////////////////
+  @Test
+  public void testMismatchesComprehensive() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete a range at the beginning from CBT
+    deleteRange(FIRST_ROW_INDEX, 21);
+
+    // Delete a row in middle from CBT
+    table.delete(new Delete(getRowKey(23)));
+
+    // Update a value in CBT
+    table.delete(new Delete(getRowKey(27)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(27)).addColumn(CF, COL, getValue(27, 0)));
+
+    // Add an extra row at the end.
+    table.put(new Put(getRowKey(5)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4), hashes.get(5));
+    p.run();
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
new file mode 100644
index 0000000000..6e3e5f004d
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.common.collect.ImmutableList;
+import com.google.gson.Gson;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * A fake for TableHashWrapper that allows us to mock the behavior of hbase's HashTable.TableHash
+ */
+public class FakeTableHashWrapper implements TableHashWrapper {
+
+  // Sorted list of partition keys splitting the key range.
+  public List<ImmutableBytesWritable> partitions;
+  // List of <Key,Hash> sorted by key.
+  public List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> hashes;
+  public ImmutableBytesWritable startRowInclusive;
+  public ImmutableBytesWritable stopRowExclusive;
+  public Scan scan;
+  private static final long serialVersionUID = 34876543L;
+
+  public FakeTableHashWrapper() {
+    this(
+        new ImmutableBytesWritable(),
+        new ImmutableBytesWritable(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        new Scan());
+  }
+
+  public FakeTableHashWrapper(
+      ImmutableBytesWritable startRowInclusive,
+      ImmutableBytesWritable stopRowExclusive,
+      List<ImmutableBytesWritable> partitions,
+      List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> hashes,
+      Scan scan) {
+    super();
+    this.startRowInclusive = startRowInclusive;
+    this.stopRowExclusive = stopRowExclusive;
+    this.partitions = partitions;
+    this.hashes = hashes;
+    this.scan = scan;
+  }
+
+  @Override
+  public int getNumHashFiles() {
+    return partitions.size() + 1;
+  }
+
+  @Override
+  public ImmutableList<ImmutableBytesWritable> getPartitions() {
+    return ImmutableList.copyOf(partitions);
+  }
+
+  @Override
+  public ImmutableBytesWritable getStartRow() {
+    return startRowInclusive;
+  }
+
+  @Override
+  public ImmutableBytesWritable getStopRow() {
+    return stopRowExclusive;
+  }
+
+  @Override
+  public Scan getScan() {
+    return scan;
+  }
+
+  @Override
+  public TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow) {
+    return new FakeTableHashReader(startRow);
+  }
+
+  private void writeObject(ObjectOutputStream s) throws IOException {
+    Gson gson = new Gson();
+    s.writeObject(gson.toJson(scan));
+    s.writeObject(gson.toJson(startRowInclusive));
+    s.writeObject(gson.toJson(stopRowExclusive));
+    s.writeObject(gson.toJson(partitions));
+    s.writeObject(gson.toJson(hashes));
+  }
+
+  private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
+    Gson gson = new Gson();
+    scan = gson.fromJson((String) s.readObject(), Scan.class);
+    startRowInclusive = gson.fromJson((String) s.readObject(), ImmutableBytesWritable.class);
+    stopRowExclusive = gson.fromJson((String) s.readObject(), ImmutableBytesWritable.class);
+    partitions = gson.fromJson((String) s.readObject(), ArrayList.class);
+    hashes = gson.fromJson((String) s.readObject(), ArrayList.class);
+  }
+
+  public class FakeTableHashReader implements TableHashReader {
+    private final ImmutableBytesWritable startRow;
+    // Copy of items to be read by this reader.
+    private final List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> entriesToRead;
+    // First next() will make index = 0, and compare it with the size of entriesToRead.
+    private int index = -1;
+
+    public FakeTableHashReader(ImmutableBytesWritable startRow) {
+      this.startRow = startRow;
+      entriesToRead = new ArrayList<>();
+      for (KV<ImmutableBytesWritable, ImmutableBytesWritable> hash : hashes) {
+        // Collect all the entries after startRow.
+        if (hash.getKey().compareTo(startRow) >= 0) {
+          entriesToRead.add(hash);
+        }
+      }
+    }
+
+    @Override
+    public boolean next() throws IOException {
+      return ++index < entriesToRead.size();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentKey() {
+      return entriesToRead.get(index).getKey();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentHash() {
+      return entriesToRead.get(index).getValue();
+    }
+
+    @Override
+    public void close() throws IOException {
+      // NOOP
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
new file mode 100644
index 0000000000..9a3acc19a9
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+public class FakeTableHashWrapperFactory extends TableHashWrapperFactory {
+
+  private static final long serialVersionUID = 269854624L;
+
+  private final FakeTableHashWrapper fakeTableHashWrapper;
+
+  public FakeTableHashWrapperFactory(FakeTableHashWrapper wrapper) {
+    this.fakeTableHashWrapper = wrapper;
+  }
+
+  @Override
+  public TableHashWrapper getTableHash(String projectId, String sourceHashDir) {
+    return fakeTableHashWrapper;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
new file mode 100644
index 0000000000..20abf02d06
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.SourceTestUtils;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HadoopHashBasedReaderTest {
+
+  private HadoopHashTableSource hashTableSource;
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final String HASH_TABLE_OUTPUT_PATH_DIR = "gs://my-bucket/outputDir";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("AAAA".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW =
+      new ImmutableBytesWritable("ZZZZ".getBytes());
+  private static final ImmutableBytesWritable POST_STOP_ROW =
+      new ImmutableBytesWritable("z".getBytes()); // Lowercase z is lexicographically > uppercase Z
+  private static final ImmutableBytesWritable EMPTY_ROW =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+  private static final ImmutableBytesWritable START_HASH =
+      new ImmutableBytesWritable("START-HASH".getBytes());
+
+  @Before
+  public void setUp() throws Exception {
+    fakeTableHashWrapper =
+        new FakeTableHashWrapper(
+            START_ROW, STOP_ROW, new ArrayList<>(), new ArrayList<>(), new Scan());
+    hashTableSource =
+        new HadoopHashTableSource(
+            StaticValueProvider.of("cbt-dev"),
+            StaticValueProvider.of(HASH_TABLE_OUTPUT_PATH_DIR),
+            START_ROW,
+            STOP_ROW,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+  }
+
+  protected static ImmutableBytesWritable getKey(int keyIndex) {
+    return new ImmutableBytesWritable(("KEY-" + keyIndex).getBytes());
+  }
+
+  protected static ImmutableBytesWritable getHash(int hashIndex) {
+    return new ImmutableBytesWritable(("HASH-" + hashIndex).getBytes());
+  }
+
+  /**
+   * Populates the fakeTableHashWrapper with {@code numEntries} entries starting with startKey.
+   * Returns a List of expected RangeHashes for this data, for numEntries=1, single RangeHash is
+   * returned (startRow, stopRow, START_HASH).
+   */
+  protected List<RangeHash> setupTestData(
+      ImmutableBytesWritable startRow, ImmutableBytesWritable stopRow, int numEntries) {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.hashes.add(KV.of(startRow, START_HASH));
+    for (int i = 0; i < numEntries - 1; i++) {
+      fakeTableHashWrapper.hashes.add(KV.of(getKey(i), getHash(i)));
+    }
+
+    // Setup RangeHashes to be returned
+    List<RangeHash> expectedRangeHashes = new ArrayList<>();
+    ImmutableBytesWritable key = startRow;
+    ImmutableBytesWritable hash = START_HASH;
+    for (int i = 0; i < numEntries - 1; i++) {
+      expectedRangeHashes.add(RangeHash.of(key, getKey(i), hash));
+      key = getKey(i);
+      hash = getHash(i);
+    }
+    expectedRangeHashes.add(RangeHash.of(key, stopRow, hash));
+    return expectedRangeHashes;
+  }
+
+  /////////////////////////////// Test the end of HashTable Output /////////////////////////
+
+  @Test
+  public void testHashReaderEmpty() throws IOException {
+    // The tableHashWrapper has no hashes, this should result in empty source.
+    assertEquals(Arrays.asList(), SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderSingleHashBatch() throws IOException {
+    // Setup 1 entry in this hashtable datafile. The test is setup so that HashTable datafile has
+    // only 1 entry.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 1);
+
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleHashBatch() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  //////////////////// Test the end of HashTable output when end of range is ""/////////////////
+  @Test
+  public void testHashReaderWithEmptyEndRow() throws IOException {
+    // Setup 4 entries in this hashtable datafile with no start or stop keys set.
+    List<RangeHash> expected = setupTestData(EMPTY_ROW, EMPTY_ROW, 4);
+    hashTableSource.startRowInclusive = EMPTY_ROW;
+    hashTableSource.stopRowExclusive = EMPTY_ROW;
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  /////////////////////////////// Test reader.getCurrent() >= stopRow /////////////////////////
+
+  @Test
+  public void testHashReaderWorkItemEndedOnFirstBatch() throws IOException {
+    // Setup 1 entry in this hashtable datafile. This entry is outside of the workitem's row
+    fakeTableHashWrapper.hashes.add(KV.of(POST_STOP_ROW, START_HASH));
+    // Source will be empty as no hashes fall in its bounds.
+    assertEquals(new ArrayList<RangeHash>(), SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderWorkItemEndedOnSecondEntry() throws IOException {
+    // Setup 1 entry in this hashtable datafile. The test is setup so that HashTable datafile has
+    // only 1 entry.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 1);
+    // Add a next entry at the stop row. Reader should stop and read just 1 entry.
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, getHash(100)));
+
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderWorkItemEndedAfterMultipleBatches() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    // Add a next entry after the stop row. Reader should stop and read just 4 entry.
+    fakeTableHashWrapper.hashes.add(KV.of(POST_STOP_ROW, getHash(100)));
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testSplitEqualsUnsplit() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    fakeTableHashWrapper.partitions = Arrays.asList(getKey(2), getKey(4));
+    SourceTestUtils.assertSourcesEqualReferenceSource(
+        hashTableSource, hashTableSource.split(1, null), null);
+  }
+
+  @Test
+  public void testUnstartedReaderEqualsStarted() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(
+        hashTableSource.createReader(null), null);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
new file mode 100644
index 0000000000..bc79f4300b
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.HashBasedReader;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.util.List;
+import junit.framework.TestCase;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.BoundedSource.BoundedReader;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HadoopHashTableSourceTest extends TestCase {
+
+  HadoopHashTableSource source;
+  FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final ValueProvider<String> PROJECT_ID = StaticValueProvider.of("test-project");
+  private static final ValueProvider<String> HASH_TABLE_OUTPUT_PATH_DIR =
+      StaticValueProvider.of("gs://my-bucket/outputDir");
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("a".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW = new ImmutableBytesWritable("z".getBytes());
+  private static final ImmutableBytesWritable PARTITION1 =
+      new ImmutableBytesWritable("d".getBytes());
+  private static final ImmutableBytesWritable PARTITION2 =
+      new ImmutableBytesWritable("g".getBytes());
+  private static final ImmutableBytesWritable EMPTY_ROW_KEY =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    fakeTableHashWrapper = new FakeTableHashWrapper();
+  }
+
+  private List<BoundedSource<RangeHash>> getSplitSources(
+      List<ImmutableBytesWritable> partitions,
+      ImmutableBytesWritable startRow,
+      ImmutableBytesWritable stopRow)
+      throws IOException {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.partitions = partitions;
+
+    source =
+        new HadoopHashTableSource(
+            PROJECT_ID,
+            HASH_TABLE_OUTPUT_PATH_DIR,
+            startRow,
+            stopRow,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+    return (List<BoundedSource<RangeHash>>) source.split(0, null);
+  }
+
+  private void testSourceSplits(
+      List<ImmutableBytesWritable> partitions,
+      ImmutableBytesWritable startRow,
+      ImmutableBytesWritable stopRow,
+      List<BoundedSource<RangeHash>> expectedSources)
+      throws IOException {
+    assertEquals(expectedSources, getSplitSources(partitions, startRow, stopRow));
+  }
+
+  @Test
+  public void testSplitZeroPartitions() throws IOException {
+    // Row range [a-z) with no splits.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, STOP_ROW));
+    testSourceSplits(ImmutableList.of(), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitOnePartition() throws IOException {
+    // Row range [a-z) with 1 splits.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testMultiplePartitons() throws IOException {
+    // Row range [a-z) with splits on {d,g}. The data files will be for {[a,d), [d,g), [g,z)}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitEmptyStartRow() throws IOException {
+    // Row range [""-z) with splits on {d,g}. The data files will be for {["",d), [d,g), [g,z)}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, EMPTY_ROW_KEY, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), EMPTY_ROW_KEY, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitEmptyStopRow() throws IOException {
+    // Row range [a-"") with splits on {d,g}. The data files will be for {[a,d), [d,g), [g,"")}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, EMPTY_ROW_KEY));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), START_ROW, EMPTY_ROW_KEY, expected);
+  }
+
+  @Test
+  public void testSplitFullTableScan() throws IOException {
+    // Row range [""-"") with splits on {d,g}. The data files will be for {["",d), [d,g), [g,"")}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, EMPTY_ROW_KEY, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, EMPTY_ROW_KEY));
+    testSourceSplits(
+        ImmutableList.of(PARTITION1, PARTITION2), EMPTY_ROW_KEY, EMPTY_ROW_KEY, expected);
+  }
+
+  @Test
+  public void testCreateReaderWithoutSplit() throws IOException {
+    source =
+        new HadoopHashTableSource(
+            PROJECT_ID,
+            HASH_TABLE_OUTPUT_PATH_DIR,
+            // When split is not called, start/stop are uninitialized. Start/stop are runtime params
+            // and are initialized in split/createReader.
+            null,
+            null,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+    // Setup boundaries on the TableHashWrapper to be used in Source.
+    fakeTableHashWrapper.startRowInclusive = START_ROW;
+    fakeTableHashWrapper.stopRowExclusive = STOP_ROW;
+
+    // Create a new Reader
+    BoundedReader<RangeHash> reader = source.createReader(null);
+
+    // Validate that the reader was properly created.
+    assertEquals(HashBasedReader.class, reader.getClass());
+    assertEquals(source, reader.getCurrentSource());
+    HashBasedReader hashBasedReader = (HashBasedReader) reader;
+    assertEquals(START_ROW, hashBasedReader.startRowInclusive);
+    assertEquals(STOP_ROW, hashBasedReader.stopRowExclusive);
+  }
+
+  @Test
+  public void testCreateReaderAfterSplit() throws IOException {
+    // Single partitions will return a 2 sources.
+    List<BoundedSource<RangeHash>> splitSources =
+        getSplitSources(ImmutableList.of(PARTITION1), START_ROW, STOP_ROW);
+    BoundedSource<RangeHash> splitHashSource = splitSources.get(0);
+
+    // Create a new Reader
+    BoundedReader<RangeHash> reader = splitHashSource.createReader(null);
+
+    // Validate that the reader was properly created.
+    assertEquals(HashBasedReader.class, reader.getClass());
+    assertEquals(splitHashSource, reader.getCurrentSource());
+    HashBasedReader hashBasedReader = (HashBasedReader) reader;
+    assertEquals(START_ROW, hashBasedReader.startRowInclusive);
+    assertEquals(PARTITION1, hashBasedReader.stopRowExclusive);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
new file mode 100644
index 0000000000..216f5d219e
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.common.truth.Truth.assertWithMessage;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import junit.framework.TestCase;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HashBasedSourceSerializationTest extends TestCase {
+
+  public static final String SOURCE_HASH_DIR = "gs://my-bucket/outputDir";
+  public static final String PROJECT_ID = "test-project";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("a".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW = new ImmutableBytesWritable("y".getBytes());
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+  }
+
+  @Test
+  public void testSerializeDefaultConstructor() throws IOException {
+    checkSerialization(new HadoopHashTableSource());
+  }
+
+  @Test
+  public void testSerializeWithValueProviders() throws IOException {
+    checkSerialization(
+        new HadoopHashTableSource(
+            StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR)));
+  }
+
+  @Test
+  public void testSerializeWithStartStop() throws IOException {
+    checkSerialization(
+        new HadoopHashTableSource(
+            StaticValueProvider.of(PROJECT_ID),
+            StaticValueProvider.of(SOURCE_HASH_DIR),
+            new ImmutableBytesWritable(START_ROW),
+            new ImmutableBytesWritable(STOP_ROW)));
+  }
+
+  @Test
+  public void testBufferedSourceSerialize() {
+    checkSerialization(
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR))));
+  }
+
+  @Test
+  public void testBufferedSourceSerializeWithBatchSize() {
+    checkSerialization(
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR)),
+            5));
+  }
+
+  private static void checkSerialization(Object source) {
+    try {
+      Object deserialized = serializeDeserialize(source);
+      checkClassDeclaresSerialVersionUid(source.getClass());
+      assertEquals(source, deserialized);
+    } catch (IOException | ClassNotFoundException e) {
+      fail(e.toString());
+    }
+  }
+
+  private static void checkClassDeclaresSerialVersionUid(Class cls) {
+    String uid = "serialVersionUID";
+    for (Field field : cls.getDeclaredFields()) {
+      if (field.getName() == uid) {
+        int modifiers = field.getModifiers();
+        assertWithMessage(field + " is not static").that(Modifier.isStatic(modifiers)).isTrue();
+        assertWithMessage(field + " is not final").that(Modifier.isFinal(modifiers)).isTrue();
+        assertWithMessage(field + " is not private").that(Modifier.isPrivate(modifiers)).isTrue();
+        assertWithMessage(field + " must be long")
+            .that(field.getType().getSimpleName())
+            .isEqualTo("long");
+        return;
+      }
+    }
+    fail(cls + " does not declare serialVersionUID");
+  }
+
+  private static Object serializeDeserialize(Object obj)
+      throws IOException, ClassNotFoundException {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    try (ObjectOutputStream outStream = new ObjectOutputStream(bos)) {
+      outStream.writeObject(obj);
+    }
+
+    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
+    try (ObjectInputStream inStream = new ObjectInputStream(bis)) {
+      return inStream.readObject();
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
new file mode 100644
index 0000000000..bad5cd8fff
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2020 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.testing.CoderProperties;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class RangeHashCoderTest {
+  private static final RangeHashCoder TEST_CODER = new RangeHashCoder();
+  private static final ImmutableBytesWritable START =
+      new ImmutableBytesWritable("Start".getBytes());
+  private static final ImmutableBytesWritable STOP = new ImmutableBytesWritable("Stop".getBytes());
+  private static final ImmutableBytesWritable HASH = new ImmutableBytesWritable("hash".getBytes());
+  private static final ImmutableBytesWritable EMPTY =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+
+  @Test
+  public void encodeRangeHash() throws Exception {
+    CoderProperties.coderDecodeEncodeEqual(TEST_CODER, RangeHash.of(START, STOP, HASH));
+  }
+
+  @Test(expected = CoderException.class)
+  public void encodeNullThrowsCoderException() throws Exception {
+    CoderUtils.encodeToByteArray(TEST_CODER, null);
+  }
+
+  @Test
+  public void testEncodedTypeDescriptor() throws Exception {
+    Assert.assertEquals(TEST_CODER.getEncodedTypeDescriptor(), TypeDescriptor.of(RangeHash.class));
+  }
+}

From 40ea4b0c1b8038e4b7e0045353a9021fe6838005 Mon Sep 17 00:00:00 2001
From: shitanshu verma <shitanshu@google.com>
Date: Thu, 4 Feb 2021 16:36:55 -0500
Subject: [PATCH 2/8] Fix lint error.

---
 .../validation/BufferedHadoopHashTableSource.java   |  3 ++-
 .../ComputeAndValidateHashFromBigtableDoFnTest.java | 13 +++++--------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
index eb018832ce..3cf415be5d 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
@@ -111,7 +111,8 @@ public boolean equals(Object o) {
       return false;
     }
     BufferedHadoopHashTableSource that = (BufferedHadoopHashTableSource) o;
-    return maxBufferSize == that.maxBufferSize && Objects.equal(hashTableSource, that.hashTableSource);
+    return maxBufferSize == that.maxBufferSize
+        && Objects.equal(hashTableSource, that.hashTableSource);
   }
 
   @Override
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
index ed725d8d37..d1fa56ba44 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
@@ -72,11 +72,9 @@ public class ComputeAndValidateHashFromBigtableDoFnTest {
   private static final int FIRST_ROW_INDEX = 20;
   private static final int LAST_ROW_INDEX = 31;
 
-  @Rule
-  public final BigtableEmulatorRule bigtableEmulator = BigtableEmulatorRule.create();
+  @Rule public final BigtableEmulatorRule bigtableEmulator = BigtableEmulatorRule.create();
 
-  @Rule
-  public final transient TestPipeline p = TestPipeline.create();
+  @Rule public final transient TestPipeline p = TestPipeline.create();
 
   private ComputeAndValidateHashFromBigtableDoFn doFn;
 
@@ -166,9 +164,7 @@ private void writeDataToTable() throws IOException {
     table.put(puts);
   }
 
-  /**
-   * Deletes the row range [startIndex, stopIndex)
-   */
+  /** Deletes the row range [startIndex, stopIndex) */
   private void deleteRange(int startIndex, int stopIndex) throws IOException {
     for (int i = startIndex; i < stopIndex; i++) {
       table.delete(new Delete(getRowKey(i)));
@@ -362,7 +358,8 @@ public void testMissingRanges() throws Exception {
     // Delete a range in middle
     deleteRange(23, 25);
 
-    // Delete row ranges at the end, bigtable scanner will finish with multiple row-ranges to process.
+    // Delete row ranges at the end, bigtable scanner will finish with multiple row-ranges to
+    // process.
     deleteRange(27, LAST_ROW_INDEX + 1);
 
     PCollection<KV<String, Iterable<List<RangeHash>>>> input =

From b91459be62a7c170bd2b182387ee95c85b6bc3a5 Mon Sep 17 00:00:00 2001
From: shitanshu verma <shitanshu@google.com>
Date: Fri, 5 Feb 2021 13:08:03 -0500
Subject: [PATCH 3/8] Fixing maven dependency.

---
 .../bigtable-beam-import/pom.xml                 | 16 ++++++++++------
 .../src/test/generate_test_data.txt              |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
index 8ee5ba861b..ffc95bdf03 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
+++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
@@ -77,6 +77,16 @@ limitations under the License.
         </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>com.google.api</groupId>
+      <artifactId>api-common</artifactId>
+      <version>1.10.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.gson</groupId>
+      <artifactId>gson</artifactId>
+      <version>2.2.4</version>
+    </dependency>
 
     <dependency>
       <groupId>org.apache.beam</groupId>
@@ -224,11 +234,6 @@ limitations under the License.
       <version>1.0.1</version>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>com.google.auto.service</groupId>
-      <artifactId>auto-service-annotations</artifactId>
-      <version>1.0-rc7</version>
-    </dependency>
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-bigtable-emulator</artifactId>
@@ -360,7 +365,6 @@ limitations under the License.
         <configuration>
           <!-- Manually promote dependencies: This is necessary to avoid flattening hbase-shaded-client's dependency tree -->
           <usedDependencies>
-            <usedDependency>com.google.auto.value:auto-value</usedDependency>
             <usedDependency>commons-codec:commons-codec</usedDependency>
             <usedDependency>com.squareup.okhttp:okhttp</usedDependency>
             <usedDependency>org.apache.beam:beam-sdks-java-io-hadoop-common
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
index 921caf2d6d..6e66d3e096 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
@@ -110,7 +110,7 @@ hbase org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot test-snapshot -c
 
 // Create the hashes for the table. Run the command from unix shell on an HBase
 // node.
-hbase org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=100 --numhashfiles=10 test /integration-test/hashtable
+hbase org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=10 --numhashfiles=10 test /integration-test/hashtable
 
 // Export the data into GCS
 hadoop fs -copyToLocal /integration-test /tmp/

From e7de6b662758ce1ebfb1dc1025b50120dae7569e Mon Sep 17 00:00:00 2001
From: shitanshu verma <shitanshu@google.com>
Date: Thu, 11 Feb 2021 13:30:01 -0500
Subject: [PATCH 4/8] Incorporating review feedback.

---
 .../bigtable-beam-import/pom.xml              | 11 ---
 .../BufferedHadoopHashTableSource.java        | 46 +++-------
 ...omputeAndValidateHashFromBigtableDoFn.java | 36 +++-----
 .../validation/HadoopHashTableSource.java     | 91 +++++++++----------
 .../beam/validation/RangeHashCoder.java       |  2 +-
 .../beam/validation/SyncTableJob.java         | 14 +--
 .../beam/validation/SyncTableUtils.java       |  4 +-
 .../beam/validation/TableHashWrapper.java     |  4 +-
 .../validation/TableHashWrapperFactory.java   |  2 +-
 .../beam/validation/TableHashWrapperImpl.java | 11 +--
 .../mapreduce/BigtableTableHashAccessor.java  |  4 +-
 .../beam/hbasesnapshots/EndToEndIT.java       |  4 +-
 .../BufferedHadoopHashTableSourceTest.java    |  2 +-
 ...teAndValidateHashFromBigtableDoFnTest.java | 81 ++++++++++++++---
 .../beam/validation/FakeTableHashWrapper.java |  2 +-
 .../FakeTableHashWrapperFactory.java          |  2 +-
 .../validation/HadoopHashBasedReaderTest.java | 10 +-
 .../validation/HadoopHashTableSourceTest.java |  2 +-
 .../HashBasedSourceSerializationTest.java     |  2 +-
 .../beam/validation/RangeHashCoderTest.java   |  2 +-
 20 files changed, 170 insertions(+), 162 deletions(-)

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
index ffc95bdf03..93a94e106d 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
+++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
@@ -77,17 +77,6 @@ limitations under the License.
         </exclusion>
       </exclusions>
     </dependency>
-    <dependency>
-      <groupId>com.google.api</groupId>
-      <artifactId>api-common</artifactId>
-      <version>1.10.0</version>
-    </dependency>
-    <dependency>
-      <groupId>com.google.code.gson</groupId>
-      <artifactId>gson</artifactId>
-      <version>2.2.4</version>
-    </dependency>
-
     <dependency>
       <groupId>org.apache.beam</groupId>
       <artifactId>beam-sdks-java-core</artifactId>
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
index 3cf415be5d..a616441655 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,13 +17,10 @@
 
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
 
-import com.google.api.core.InternalApi;
 import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.beam.sdk.coders.Coder;
@@ -33,8 +30,6 @@
 import org.apache.beam.sdk.io.BoundedSource;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.values.KV;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.util.Bytes;
 
 /**
@@ -45,19 +40,20 @@
  * <p>Hadoop HashTable output is sorted by row-key and contains a row-range and hash. Beam
  * Pcollection do not guarantee any ordering. To fetch a batch of ranges in 1 ReadRows operation,
  * this source buffers then and outputs a List<RangeHash> guaranteeing the sorted order of ranges.
+ *
+ * <p>Emits a batch of sorted RangeHashes keyed by the start key of the first range.
  */
-@InternalApi
 class BufferedHadoopHashTableSource extends BoundedSource<KV<String, List<RangeHash>>> {
 
   private static final long serialVersionUID = 39842743L;
 
-  public static final Log LOG = LogFactory.getLog(BufferedHadoopHashTableSource.class);
   private static final int DEFAULT_BATCH_SIZE = 50;
+  private static final Coder<KV<String, List<RangeHash>>> CODER =
+      KvCoder.of(StringUtf8Coder.of(), ListCoder.of(RangeHashCoder.of()));;
 
   // Max number of RangeHashes to buffer.
-  private int maxBufferSize;
-  private HadoopHashTableSource hashTableSource;
-  private Coder<KV<String, List<RangeHash>>> coder;
+  private final int maxBufferSize;
+  private final HadoopHashTableSource hashTableSource;
 
   public BufferedHadoopHashTableSource(HadoopHashTableSource source) {
     this(source, DEFAULT_BATCH_SIZE);
@@ -65,7 +61,6 @@ public BufferedHadoopHashTableSource(HadoopHashTableSource source) {
 
   public BufferedHadoopHashTableSource(HadoopHashTableSource hashTableSource, int maxBufferSize) {
     this.hashTableSource = hashTableSource;
-    this.coder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(RangeHashCoder.of()));
     this.maxBufferSize = maxBufferSize;
   }
 
@@ -88,13 +83,13 @@ public List<? extends BoundedSource<KV<String, List<RangeHash>>>> split(
 
   @Override
   public Coder<KV<String, List<RangeHash>>> getOutputCoder() {
-    return coder;
+    return CODER;
   }
 
   @Override
   public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
     // HashTable data files don't expose a method to estimate size or lineCount.
-    return 0;
+    return hashTableSource.getEstimatedSizeBytes(options);
   }
 
   @Override
@@ -130,21 +125,10 @@ public String toString() {
         + maxBufferSize;
   }
 
-  private void writeObject(ObjectOutputStream s) throws IOException {
-    s.writeObject(hashTableSource);
-    s.writeInt(maxBufferSize);
-  }
-
-  private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
-    this.hashTableSource = (HadoopHashTableSource) s.readObject();
-    this.coder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(RangeHashCoder.of()));
-    this.maxBufferSize = s.readInt();
-  }
-
   private static class BufferedHashBasedReader extends BoundedReader<KV<String, List<RangeHash>>> {
 
-    private BoundedReader<RangeHash> hashReader;
-    private BufferedHadoopHashTableSource source;
+    private final BoundedReader<RangeHash> hashReader;
+    private final BufferedHadoopHashTableSource source;
 
     private List<RangeHash> buffer;
 
@@ -181,6 +165,9 @@ private boolean bufferRangeHashes() throws IOException {
 
     @Override
     public boolean advance() throws IOException {
+      // Reset the buffer for next batch.
+      buffer = new ArrayList<>(source.maxBufferSize);
+
       return bufferRangeHashes();
     }
 
@@ -188,15 +175,12 @@ public boolean advance() throws IOException {
     public KV<String, List<RangeHash>> getCurrent() {
       // getCurrent only gets called when buffer is not empty.
       Preconditions.checkArgument(!buffer.isEmpty(), "Can not get current on empty buffer.");
-      List<RangeHash> hashes = buffer;
-      // Reset the buffer for next batch.
-      buffer = new ArrayList<>(source.maxBufferSize);
       // GroupBy key is a string and not ImmutableBytesWritable because the WritableCoder is not
       // deterministic. The outputted PCollection is grouped by the K and needs a deterministic
       // coder. Having a String K leads to an unfortunate double encoding, ImmutableBytesWritable->
       // HEX string -> UTF8 encoded string. The number of batches are significantly smaller than
       // data fetched from Bigtable and should not have meaningful impact on the job performance.
-      return KV.of(Bytes.toStringBinary(hashes.get(0).startInclusive.copyBytes()), hashes);
+      return KV.of(Bytes.toStringBinary(buffer.get(0).startInclusive.copyBytes()), buffer);
     }
 
     @Override
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
index 3801465f2f..62984e8ce2 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
 
-import com.google.api.core.InternalApi;
+import com.google.bigtable.repackaged.com.google.common.base.Preconditions;
 import com.google.cloud.bigtable.beam.AbstractCloudBigtableTableDoFn;
 import com.google.cloud.bigtable.beam.CloudBigtableConfiguration;
 import com.google.cloud.bigtable.beam.TemplateUtils;
@@ -45,7 +45,6 @@
  * A {@link DoFn} that takes a row range and hash from HBase and validates the hash from rows read
  * from Cloud Bigtable.
  */
-@InternalApi
 class ComputeAndValidateHashFromBigtableDoFn
     extends AbstractCloudBigtableTableDoFn<KV<String, Iterable<List<RangeHash>>>, RangeHash> {
 
@@ -91,7 +90,7 @@ public void processElement(ProcessContext context) throws Exception {
     for (List<RangeHash> rangeHashes : context.element().getValue()) {
       if (rangeHashes.isEmpty()) {
         // No rows ranges found, return;
-        return;
+        continue;
       }
 
       ImmutableBytesWritable rangeStartInclusive = rangeHashes.get(0).startInclusive;
@@ -126,19 +125,16 @@ public void processElement(ProcessContext context) throws Exception {
         // rangeHashes until rowKey's range is found.
         while (!isWithinUpperBound(currentRangeHash.stopExclusive, rowKey)) {
           validateBatchHash(context, resultHasher, currentRangeHash);
-          if (!rangeHashIterator.hasNext()) {
-            // THIS SHOULD NEVER HAPPEN. Bigtable is being scanned till the last
-            // RangeHash.endKeyExclusive(), so bigtable's result should not outlast the
-            // rangeHashes.
-            throw new IllegalStateException(
-                "Buffer reached to end while scan is still active at row :"
-                    + immutableBytesToString(result.getRow())
-                    + ". Affected Range: ["
-                    + immutableBytesToString(rangeStartInclusive)
-                    + ", "
-                    + immutableBytesToString(rangeEndExclusive)
-                    + ").");
-          }
+          // THIS SHOULD NEVER HAPPEN. Bigtable is being scanned till the last
+          // RangeHash.endKeyExclusive(), so bigtable's result should not outlast the
+          // rangeHashes.
+          Preconditions.checkState(
+              rangeHashIterator.hasNext(),
+              "Buffer reached to end while scan is still active at row : %s. "
+                  + "Affected Range: [%s, %s)."
+                  + immutableBytesToString(result.getRow())
+                  + immutableBytesToString(rangeStartInclusive)
+                  + immutableBytesToString(rangeEndExclusive));
           currentRangeHash = rangeHashIterator.next();
         }
 
@@ -195,10 +191,6 @@ private ResultScanner createBigtableScan(byte[] startKeyInclusive, byte[] stopKe
   /**
    * Determines if row >= stopExclusive for a row range (start, stopExclusive). Empty stopExclusive
    * represents a range with no upper bound.
-   *
-   * @param stopExclusive
-   * @param row
-   * @return
    */
   private boolean isWithinUpperBound(
       ImmutableBytesWritable stopExclusive, ImmutableBytesWritable row) {
@@ -216,7 +208,7 @@ private void validateBatchHash(
       matches.inc();
     }
     // Start a new batch
-    resultHasher.startBatch(new ImmutableBytesWritable(currentRangeHash.stopExclusive));
+    resultHasher.startBatch(currentRangeHash.stopExclusive);
   }
 
   private void reportMismatch(ProcessContext context, RangeHash currentRangeHash) {
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
index 20b693963a..59095c8b54 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
@@ -18,11 +18,9 @@
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.createConfiguration;
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
 
-import autovalue.shaded.com.google$.common.annotations.$VisibleForTesting;
-import com.google.api.core.InternalApi;
+import com.google.bigtable.repackaged.com.google.common.annotations.VisibleForTesting;
 import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
 import com.google.cloud.bigtable.beam.validation.TableHashWrapper.TableHashReader;
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -32,6 +30,7 @@
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
+import javax.annotation.Nullable;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.DefaultCoder;
 import org.apache.beam.sdk.io.BoundedSource;
@@ -46,7 +45,6 @@
  * A beam source to read output of Hadoop HashTable job. The source creates 1 workitem per HashTable
  * data file and emits a row-range/hash pair.
  */
-@InternalApi
 class HadoopHashTableSource extends BoundedSource<RangeHash> implements Serializable {
 
   private static final long serialVersionUID = 2383724L;
@@ -120,9 +118,9 @@ public int hashCode() {
   private RangeHashCoder coder;
 
   // Row range owned by this source.
-  @VisibleForTesting ImmutableBytesWritable startRowInclusive;
+  @VisibleForTesting @Nullable ImmutableBytesWritable startRowInclusive;
 
-  @VisibleForTesting ImmutableBytesWritable stopRowExclusive;
+  @VisibleForTesting @Nullable ImmutableBytesWritable stopRowExclusive;
 
   private TableHashWrapperFactory tableHashWrapperFactory;
 
@@ -143,12 +141,12 @@ public HadoopHashTableSource(
    * Constructor to initialize a HadoopHashTableSource for a given row-range. Used for creating
    * split sources.
    */
-  @$VisibleForTesting
+  @VisibleForTesting
   HadoopHashTableSource(
       ValueProvider<String> projectId,
       ValueProvider<String> sourceHashDir,
-      ImmutableBytesWritable startRowInclusive,
-      ImmutableBytesWritable stopRowExclusive) {
+      @Nullable ImmutableBytesWritable startRowInclusive,
+      @Nullable ImmutableBytesWritable stopRowExclusive) {
     this(
         projectId,
         sourceHashDir,
@@ -161,8 +159,8 @@ public HadoopHashTableSource(
   HadoopHashTableSource(
       ValueProvider<String> projectId,
       ValueProvider<String> hadoopHashTableOutputDir,
-      ImmutableBytesWritable startRowInclusive,
-      ImmutableBytesWritable stopRowExclusive,
+      @Nullable ImmutableBytesWritable startRowInclusive,
+      @Nullable ImmutableBytesWritable stopRowExclusive,
       TableHashWrapperFactory tableHashWrapperFactory) {
     this.coder = new RangeHashCoder();
     this.projectId = projectId;
@@ -192,15 +190,15 @@ public List<? extends BoundedSource<RangeHash>> split(
           new HadoopHashTableSource(
               projectId,
               sourceHashDir,
-              new ImmutableBytesWritable(hash.getStartRow()),
-              new ImmutableBytesWritable(hash.getStopRow()),
+              hash.getStartRow(),
+              hash.getStopRow(),
               tableHashWrapperFactory));
       return splitSources;
     }
 
     // Use the HashTable start key. The value is HConstants.EMPTY_START_ROW for full table scan.
-    ImmutableBytesWritable startRow = new ImmutableBytesWritable(hash.getStartRow());
-    ImmutableBytesWritable stopRow = new ImmutableBytesWritable(hash.getStopRow());
+    ImmutableBytesWritable startRow = hash.getStartRow();
+    ImmutableBytesWritable stopRow = hash.getStopRow();
 
     // The output of HashTable is organized as partition file and a set of datafiles.
     // Partition file contains a list of partitions, these partitions split the key-range of a table
@@ -238,7 +236,7 @@ public List<? extends BoundedSource<RangeHash>> split(
             projectId,
             sourceHashDir,
             partitions.get(numPartitions - 1),
-            new ImmutableBytesWritable(stopRow),
+            stopRow,
             tableHashWrapperFactory));
     LOG.info("Returning " + splitSources.size() + " sources from " + numPartitions + " partitions");
     return splitSources;
@@ -270,11 +268,11 @@ public BoundedReader createReader(PipelineOptions options) throws IOException {
 
     return new HashBasedReader(
         this,
-        new ImmutableBytesWritable(startRowInclusive),
-        new ImmutableBytesWritable(stopRowExclusive),
+        startRowInclusive,
+        stopRowExclusive,
         hash.newReader(
             createConfiguration(this.projectId.get(), this.sourceHashDir.get()),
-            new ImmutableBytesWritable(startRowInclusive)));
+            startRowInclusive));
   }
 
   @Override
@@ -307,7 +305,6 @@ public String toString() {
   }
 
   private void writeObject(ObjectOutputStream s) throws IOException {
-    // s.defaultWriteObject();
     s.writeObject(projectId);
     s.writeObject(sourceHashDir);
     s.writeObject(tableHashWrapperFactory);
@@ -328,50 +325,48 @@ private void writeObject(ObjectOutputStream s) throws IOException {
   }
 
   private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
-    // s.defaultReadObject();
-    this.projectId = (ValueProvider<String>) s.readObject();
-    this.sourceHashDir = (ValueProvider<String>) s.readObject();
-    this.tableHashWrapperFactory = (TableHashWrapperFactory) s.readObject();
+    projectId = (ValueProvider<String>) s.readObject();
+    sourceHashDir = (ValueProvider<String>) s.readObject();
+    tableHashWrapperFactory = (TableHashWrapperFactory) s.readObject();
     // start/stop can be null, they are preceded by a boolean indicating their presence.
     if (s.readBoolean() == true) {
-      this.startRowInclusive = new ImmutableBytesWritable((byte[]) s.readObject());
+      startRowInclusive = new ImmutableBytesWritable((byte[]) s.readObject());
     }
     if (s.readBoolean() == true) {
-      this.stopRowExclusive = new ImmutableBytesWritable((byte[]) s.readObject());
+      stopRowExclusive = new ImmutableBytesWritable((byte[]) s.readObject());
     }
   }
 
   @VisibleForTesting
   static class HashBasedReader extends BoundedReader<RangeHash> {
 
-    final HadoopHashTableSource source;
-    final TableHashReader reader;
+    private final HadoopHashTableSource source;
+    private final TableHashReader reader;
 
-    final ImmutableBytesWritable startRowInclusive;
-    final ImmutableBytesWritable stopRowExclusive;
+    @VisibleForTesting final ImmutableBytesWritable startRowInclusive;
+    @VisibleForTesting final ImmutableBytesWritable stopRowExclusive;
 
-    long numKeys = 0;
+    private long numKeys = 0;
     // Flag indicating that this workitem is finished.
-    boolean isDone = false;
-    ImmutableBytesWritable currentRangeStartKey;
+    private boolean isDone = false;
+    private ImmutableBytesWritable currentRangeStartKey;
     // Hash for the current range.
-    ImmutableBytesWritable currentHash;
-    RangeHash currentRangeHash;
+    private ImmutableBytesWritable currentHash;
+    private RangeHash currentRangeHash;
 
     public HashBasedReader(
         HadoopHashTableSource source,
         ImmutableBytesWritable startRowInclusive,
         ImmutableBytesWritable stopRowExclusive,
         TableHashReader reader) {
-      this.reader = reader;
       this.source = source;
       this.startRowInclusive = startRowInclusive;
       this.stopRowExclusive = stopRowExclusive;
+      this.reader = reader;
     }
 
     @Override
     public boolean start() throws IOException {
-      // NO CHECKED EXCEPTIONS HERE.
       LOG.debug(
           "Starting a new reader at key range ["
               + immutableBytesToString(startRowInclusive)
@@ -401,14 +396,9 @@ public boolean advance() throws IOException {
       ImmutableBytesWritable startKey = this.currentRangeStartKey;
       ImmutableBytesWritable hash = this.currentHash;
 
-      if (!readNextKey()) {
-        this.currentRangeHash = RangeHash.of(startKey, stopRowExclusive, hash);
-        // return true since we have lastBatchStartKey to emit. Set isDone=true to prevent reading
-        // from a potentially exhausted reader.
-        isDone = true;
-      } else {
-        this.currentRangeHash = RangeHash.of(startKey, reader.getCurrentKey(), hash);
-      }
+      // if there is nothing to read, we are done. readNextKey advances the currentRangeStartKey.
+      isDone = !readNextKey();
+      currentRangeHash = RangeHash.of(startKey, currentRangeStartKey, hash);
 
       return true;
     }
@@ -417,22 +407,23 @@ public boolean advance() throws IOException {
     private boolean readNextKey() throws IOException {
       if (reader.next()) {
         numKeys++;
-        this.currentRangeStartKey = reader.getCurrentKey();
+        currentRangeStartKey = reader.getCurrentKey();
         if ( // StopRow is not set, everything is in bounds.
         (stopRowExclusive.equals(HConstants.EMPTY_END_ROW)
             || currentRangeStartKey.compareTo(stopRowExclusive) < 0)) { // currentKey < stopKey
           // There is a key to read and the key is within the bounds of this workitem. Return true.
-          this.currentHash = reader.getCurrentHash();
+          currentHash = reader.getCurrentHash();
           return true;
         } else {
           // There is a key to read but its outside of the bounds of this workitem.
-          this.currentHash = null;
+          currentHash = null;
           return false;
         }
       }
 
-      // Nothing left to read for this workitem.
-      currentRangeStartKey = null;
+      // Nothing left to read for this workitem. Next range would have started from
+      // stopRowExclusive.
+      currentRangeStartKey = stopRowExclusive;
       currentHash = null;
       return false;
     }
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
index 6799d63872..d6341a08f2 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
index a664ea2602..56b38fc3cb 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@
 package com.google.cloud.bigtable.beam.validation;
 
 import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly;
+import com.google.bigtable.repackaged.com.google.gson.Gson;
 import com.google.cloud.bigtable.beam.sequencefiles.Utils;
 import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.gson.Gson;
 import java.util.List;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.PipelineResult;
@@ -183,17 +183,11 @@ public static Pipeline buildPipeline(SyncTableOptions opts) {
   static class RangeHashToString extends SimpleFunction<RangeHash, String> {
     // TODO maybe explore a sequenceFile sink for RangeHash. Hadoop jobs using this output may be
     // easier to write for sequence file.
-
-    // GSON is not serializable, keep it transient. Member variable to avoid creating a Gson object
-    // per apply call.
-    private transient Gson gson = null;
+    private static final Gson GSON = new Gson();
 
     @Override
     public String apply(RangeHash input) {
-      if (gson == null) {
-        gson = new Gson();
-      }
-      return gson.toJson(input);
+      return GSON.toJson(input);
     }
   }
 }
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
index 2f0c5cc4cc..cc92bea6a4 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,12 +15,14 @@
  */
 package com.google.cloud.bigtable.beam.validation;
 
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
 
 /** Utility class for SyncTable job. */
+@InternalApi
 public class SyncTableUtils {
 
   private SyncTableUtils() {}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
index 2f75c5722a..55200570ed 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
  */
 package com.google.cloud.bigtable.beam.validation;
 
-import com.google.api.core.InternalApi;
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
 import com.google.common.collect.ImmutableList;
 import java.io.Closeable;
 import java.io.IOException;
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
index 262aadc7c5..67776299a4 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
index 71a0f6ddaa..b04bd538a6 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,11 +36,10 @@ static TableHashWrapper create(Configuration conf, String hashTableOutputDir) th
     TableHashWrapper tableHashWrapper = new TableHashWrapperImpl(tableHash);
     Preconditions.checkArgument(
         tableHashWrapper.getNumHashFiles() == (tableHashWrapper.getPartitions().size() + 1),
-        String.format(
-            "Corrupt hashtable output. %d hash files for %d partitions. Expected %d files.",
-            tableHashWrapper.getNumHashFiles(),
-            tableHashWrapper.getPartitions().size(),
-            tableHashWrapper.getPartitions().size() + 1));
+        "Corrupt hashtable output. %d hash files for %d partitions. Expected %d files.",
+        tableHashWrapper.getNumHashFiles(),
+        tableHashWrapper.getPartitions().size(),
+        tableHashWrapper.getPartitions().size() + 1);
     return tableHashWrapper;
   }
 
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
index a5312d6c52..a7db0add1c 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 package org.apache.hadoop.hbase.mapreduce;
 
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
 import com.google.common.collect.ImmutableList;
 import java.io.IOException;
 import org.apache.hadoop.hbase.client.Result;
@@ -24,6 +25,7 @@
 import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash;
 
 /** A helper class to access package private fields of HashTable.TableHash. */
+@InternalApi
 public class BigtableTableHashAccessor {
 
   // Restrict object creation. This class should only be used to access state from TableHash.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
index e7f777f9bc..3c8e26cbfa 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
@@ -166,6 +166,8 @@ public void teardown() throws IOException {
       for (GcsPath path : paths) {
         pathStrs.add(path.toString());
       }
+      // TODO: cleanup fails when tests time out. Add a orphan cleaner in the setup()
+      // https://github.com/googleapis/java-bigtable/blob/35588d89b9b243eb691a29d3aff16b9f5a08fbb8/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/test_helpers/env/AbstractTestEnv.java#L108-L119
       this.gcsUtil.remove(pathStrs);
     }
 
@@ -309,7 +311,7 @@ public void testHBaseSnapshotImportWithCorruptions() throws Exception {
 
     List<GcsPath> outputs = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "*"));
 
-    System.out.println("OUTPUTS: " + outputs);
+    LOG.warn("OUTPUTS: " + outputs);
     // FileSink will shard the outputs and will created >1 files.
     Assert.assertTrue(outputs.size() > 1);
     // TODO read the files and validate that the ranges are there instead of size check.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
index ee574a9c2d..96d5960423 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
index d1fa56ba44..2e9b6fd8ed 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,6 +29,11 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
@@ -47,6 +52,7 @@
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor.BigtableResultHasher;
 import org.junit.After;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -192,6 +198,20 @@ private RangeHash createHash(byte[] startRow, byte[] stopRow) throws IOException
         hasher.getBatchHash());
   }
 
+  private void validateCounters(
+      PipelineResult result, Long expectedMatches, Long expectedMismatches) {
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    Map<String, Long> counters =
+        StreamSupport.stream(metrics.getCounters().spliterator(), false)
+            .collect(Collectors.toMap((m) -> m.getName().getName(), (m) -> m.getAttempted()));
+    if (expectedMatches > 0) {
+      Assert.assertEquals(expectedMatches, counters.get("ranges_matched"));
+    }
+    if (expectedMismatches > 0) {
+      Assert.assertEquals(expectedMismatches, counters.get("ranges_not_matched"));
+    }
+  }
+
   ////////// Happy case tests for various setups//////////////////////
   @Test
   public void testHashMatchesForMultipleRange() throws Exception {
@@ -203,7 +223,8 @@ public void testHashMatchesForMultipleRange() throws Exception {
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).empty();
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 0L);
   }
 
   @Test
@@ -215,7 +236,8 @@ public void testHashMatchesForSingleRange() throws Exception {
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).containsInAnyOrder();
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 0L);
   }
 
   @Test
@@ -228,7 +250,8 @@ public void testHashMatchesForFullTableScanWithMultipleRange() throws Exception
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).empty();
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 0L);
   }
 
   @Test
@@ -242,7 +265,31 @@ public void testHashMatchesForMultipleSingleRowRange() throws Exception {
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).empty();
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 3L, 0L);
+  }
+
+  ///////////////// Test mismatches with multiple ranges per Key in KV<> ////////////////////
+  @Test
+  public void testHashMisMatchesForMultipleRangeAcrossKV() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(28)));
+
+    // Corrupt both the ranges
+    table.delete(new Delete(getRowKey(21)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(24)).addColumn(CF2, COL, TS, getValue(20, 0)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(
+            Create.of(
+                KV.of(
+                    new String(getRowKey(21)),
+                    Arrays.asList(Arrays.asList(hashes.get(0)), Arrays.asList(hashes.get(1))))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes);
+    PipelineResult result = p.run();
+    validateCounters(result, 0L, 2L);
   }
 
   ///////////////// Test mismatches when Bigtable has extra rows ////////////////////
@@ -260,7 +307,8 @@ public void testAdditionalCellInMiddle() throws Exception {
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).containsInAnyOrder(hashes.get(1));
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 1L);
   }
 
   @Test
@@ -280,7 +328,8 @@ public void testAdditionalRowsAtEnds() throws Exception {
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).containsInAnyOrder(hashes.get(0), hashes.get(2));
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 2L);
   }
 
   ///////////////////// Test different values ///////////////////////////
@@ -314,7 +363,8 @@ public void testDifferentValues() throws Exception {
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output)
         .containsInAnyOrder(hashes.get(0), hashes.get(1), hashes.get(2), hashes.get(3));
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 4L);
   }
 
   ////////////////// Tests with CBT missing data //////////////////////////////
@@ -340,7 +390,8 @@ public void testMissingRows() throws Exception {
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4));
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 3L);
   }
 
   @Test
@@ -368,7 +419,8 @@ public void testMissingRanges() throws Exception {
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output)
         .containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4), hashes.get(5));
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 4L);
   }
 
   @Test
@@ -385,7 +437,8 @@ public void testCbtEmpty() throws Exception {
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).containsInAnyOrder(hashes);
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 0L, 3L);
   }
 
   ////////////////////// Test that scan is used from TableHash.////////////////////////
@@ -404,7 +457,8 @@ public void testScanFromTableHash() throws Exception {
 
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output).containsInAnyOrder(hashes);
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 0L, 3L);
   }
 
   ////////////////////// Combination of different cases //////////////////////////////////
@@ -436,6 +490,7 @@ public void testMismatchesComprehensive() throws Exception {
     PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
     PAssert.that(output)
         .containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4), hashes.get(5));
-    p.run();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 4L);
   }
 }
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
index 6e3e5f004d..04cce0b1cd 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
index 9a3acc19a9..2e65e3b855 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
index 20abf02d06..fa88a56d14 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -44,8 +44,6 @@ public class HadoopHashBasedReaderTest {
       new ImmutableBytesWritable("AAAA".getBytes());
   private static final ImmutableBytesWritable STOP_ROW =
       new ImmutableBytesWritable("ZZZZ".getBytes());
-  private static final ImmutableBytesWritable POST_STOP_ROW =
-      new ImmutableBytesWritable("z".getBytes()); // Lowercase z is lexicographically > uppercase Z
   private static final ImmutableBytesWritable EMPTY_ROW =
       new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
   private static final ImmutableBytesWritable START_HASH =
@@ -139,7 +137,7 @@ public void testHashReaderWithEmptyEndRow() throws IOException {
   @Test
   public void testHashReaderWorkItemEndedOnFirstBatch() throws IOException {
     // Setup 1 entry in this hashtable datafile. This entry is outside of the workitem's row
-    fakeTableHashWrapper.hashes.add(KV.of(POST_STOP_ROW, START_HASH));
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, START_HASH));
     // Source will be empty as no hashes fall in its bounds.
     assertEquals(new ArrayList<RangeHash>(), SourceTestUtils.readFromSource(hashTableSource, null));
   }
@@ -159,8 +157,8 @@ public void testHashReaderWorkItemEndedOnSecondEntry() throws IOException {
   public void testHashReaderWorkItemEndedAfterMultipleBatches() throws IOException {
     // Setup 4 entries in this hashtable datafile.
     List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 4);
-    // Add a next entry after the stop row. Reader should stop and read just 4 entry.
-    fakeTableHashWrapper.hashes.add(KV.of(POST_STOP_ROW, getHash(100)));
+    // Add a next entry at the stop row. Reader should stop and read just 4 entry.
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, getHash(100)));
     assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
   }
 
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
index bc79f4300b..a3aba3f756 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
index 216f5d219e..8c7f6cc8c4 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
index bad5cd8fff..5f644e3b50 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 5093d87bb9d67e5617dcc0e7859dbe2d9dfad67b Mon Sep 17 00:00:00 2001
From: shitanshu verma <shitanshu@google.com>
Date: Thu, 11 Feb 2021 14:08:34 -0500
Subject: [PATCH 5/8] Fixing maven build issues.

---
 bigtable-dataflow-parent/bigtable-beam-import/pom.xml       | 6 ++++++
 .../bigtable/beam/validation/FakeTableHashWrapper.java      | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
index 93a94e106d..b1d909f25a 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
+++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
@@ -229,6 +229,11 @@ limitations under the License.
       <version>0.124.0</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${jsr305.version}</version>
+    </dependency>
   </dependencies>
 
   <build>
@@ -354,6 +359,7 @@ limitations under the License.
         <configuration>
           <!-- Manually promote dependencies: This is necessary to avoid flattening hbase-shaded-client's dependency tree -->
           <usedDependencies>
+            <usedDependency>com.google.auto.value:auto-value</usedDependency>
             <usedDependency>commons-codec:commons-codec</usedDependency>
             <usedDependency>com.squareup.okhttp:okhttp</usedDependency>
             <usedDependency>org.apache.beam:beam-sdks-java-io-hadoop-common
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
index 04cce0b1cd..ee2b6814e2 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
@@ -15,8 +15,8 @@
  */
 package com.google.cloud.bigtable.beam.validation;
 
+import com.google.bigtable.repackaged.com.google.gson.Gson;
 import com.google.common.collect.ImmutableList;
-import com.google.gson.Gson;
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;

From 4073bf63124a87b47e443769292992ebdb3cc4af Mon Sep 17 00:00:00 2001
From: shitanshu verma <shitanshu@google.com>
Date: Fri, 12 Feb 2021 10:50:25 -0500
Subject: [PATCH 6/8] Adding validation of mismatches in integration tests.

---
 .../bigtable-beam-import/pom.xml              |   1 -
 .../validation/HadoopHashTableSource.java     |   4 +-
 .../beam/hbasesnapshots/EndToEndIT.java       | 111 ++++++++++++++----
 3 files changed, 89 insertions(+), 27 deletions(-)

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
index b1d909f25a..778083f0b9 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
+++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
@@ -26,7 +26,6 @@ limitations under the License.
 
   <properties>
     <mainClass>com.google.cloud.bigtable.beam.Main</mainClass>
-    <skipITs>false</skipITs>
   </properties>
 
   <!-- Adding this to resolve version conflict within beam sdk-->
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
index 59095c8b54..138ba3f860 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
@@ -18,6 +18,7 @@
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.createConfiguration;
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
 
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
 import com.google.bigtable.repackaged.com.google.common.annotations.VisibleForTesting;
 import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
 import com.google.cloud.bigtable.beam.validation.TableHashWrapper.TableHashReader;
@@ -45,7 +46,8 @@
  * A beam source to read output of Hadoop HashTable job. The source creates 1 workitem per HashTable
  * data file and emits a row-range/hash pair.
  */
-class HadoopHashTableSource extends BoundedSource<RangeHash> implements Serializable {
+@InternalApi
+public class HadoopHashTableSource extends BoundedSource<RangeHash> implements Serializable {
 
   private static final long serialVersionUID = 2383724L;
 
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
index 3c8e26cbfa..0320dd1a61 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
@@ -18,13 +18,20 @@
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.api.services.storage.model.Objects;
+import com.google.bigtable.repackaged.com.google.gson.Gson;
 import com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot.ImportOptions;
+import com.google.cloud.bigtable.beam.sequencefiles.HBaseResultToMutationFn;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
 import com.google.cloud.bigtable.beam.validation.SyncTableJob;
 import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
 import com.google.cloud.bigtable.hbase.BigtableConfiguration;
 import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -42,10 +49,9 @@
 import org.apache.beam.sdk.metrics.MetricQueryResults;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Connection;
@@ -54,11 +60,12 @@
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
-import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /*
  * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable and
@@ -79,7 +86,7 @@
  */
 public class EndToEndIT {
 
-  private final Log LOG = LogFactory.getLog(getClass());
+  private static Logger LOG = LoggerFactory.getLogger(HBaseResultToMutationFn.class);
   private static final String TEST_SNAPSHOT_NAME = "test-snapshot";
   // Location of test data hosted on Google Cloud Storage, for on-cloud dataflow tests.
   private static final String CLOUD_TEST_DATA_FOLDER = "cloud.test.data.folder";
@@ -228,6 +235,60 @@ private Map<String, Long> getCountMap(PipelineResult result) {
         .collect(Collectors.toMap((m) -> m.getName().getName(), (m) -> m.getAttempted()));
   }
 
+  /**
+   * Reads the output of SyncTable job and returns a list of mismatched RangeHashes.
+   *
+   * @throws IOException
+   */
+  private List<RangeHash> readMismatchesFromOutputFiles() throws IOException {
+    Gson gson = new Gson();
+    // Find output files
+    List<GcsPath> outputFiles = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "*"));
+    List<RangeHash> rangeHashes = new ArrayList<>();
+
+    // Read each file line by line and create a RangeHash from it.
+    for (GcsPath outputFile : outputFiles) {
+      int size = (int) gcsUtil.fileSize(outputFile);
+      byte[] fileContents = new byte[size];
+      gcsUtil.open(outputFile).read(ByteBuffer.wrap(fileContents));
+      BufferedReader reader =
+          new BufferedReader(new InputStreamReader(new ByteArrayInputStream(fileContents)));
+      String serializedRangeHash;
+      while ((serializedRangeHash = reader.readLine()) != null) {
+        try {
+          rangeHashes.add(gson.fromJson(serializedRangeHash.trim(), RangeHash.class));
+        } catch (Exception e) {
+          LOG.error("Failed to parse JSON: [" + serializedRangeHash + "]", e);
+          throw e;
+        }
+      }
+    }
+    return rangeHashes;
+  }
+
+  // Asserts that all the rowKeys belong in mismatches.
+  // Throws AssertionException
+  private void validateRowInRangeHashes(List<byte[]> rowKeys, Iterable<RangeHash> mismatches) {
+    for (byte[] mismatchedRowKey : rowKeys) {
+      Assert.assertTrue(containsRow(mismatchedRowKey, mismatches));
+    }
+  }
+
+  // Returns true if the rowKey belongs in one of the ranges contained in rangeHashes.
+  private boolean containsRow(byte[] rowKey, Iterable<RangeHash> rangeHashes) {
+    for (RangeHash mismatchedRange : rangeHashes) {
+      // TODO: There maybe a better Range.belongs() utility function somewhere?
+      // Empty start/end key means that there is no start/end key.
+      if ((mismatchedRange.startInclusive.equals(HConstants.EMPTY_BYTE_ARRAY)
+              || mismatchedRange.startInclusive.compareTo(rowKey) <= 0)
+          && (mismatchedRange.stopExclusive.equals(HConstants.EMPTY_BYTE_ARRAY)
+              || mismatchedRange.stopExclusive.compareTo(rowKey) > 0)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   @Test
   public void testHBaseSnapshotImport() throws Exception {
 
@@ -253,16 +314,13 @@ public void testHBaseSnapshotImport() throws Exception {
     state = result.waitUntilFinish();
     Assert.assertEquals(State.DONE, state);
 
-    List<GcsPath> outputs = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "*"));
-    // FileSink will write an empty file when there are no mismatches
-    Assert.assertEquals(1, outputs.size());
-    // TODO read the actual files and validate the ranges instead of size check
-    Assert.assertEquals(0, gcsUtil.fileSize(outputs.get(0)));
+    // Read the output files and validate that there are no mismatches.
+    Assert.assertEquals(0, readMismatchesFromOutputFiles().size());
 
     // Validate the counters.
     Map<String, Long> counters = getCountMap(result);
-    Assert.assertEquals(counters.size(), 1);
     Assert.assertEquals(counters.get("ranges_matched"), (Long) 101L);
+    Assert.assertNull(counters.get("ranges_not_matched"));
   }
 
   /**
@@ -276,15 +334,21 @@ public void testHBaseSnapshotImportWithCorruptions() throws Exception {
     State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish();
     Assert.assertEquals(State.DONE, state);
 
+    // Rows where corruptions will be added.
+    byte[] mismatchRowAtStart = "000".getBytes();
+    byte[] mismatchRowInMiddle = "24".getBytes();
+    byte[] mismatchRowDeleted = "64".getBytes();
+    byte[] mismatchRowAtTheEnd = "999".getBytes();
+
     // Introduce corruptions to the data in Bigtable. Delete data from Bigtable to simulate Bigtable
     // missing data. Add data to Bigtable to simulate extra data in Bigtable. It is easier to update
     // Bigtable than change the snapshots.
     Table table = connection.getTable(TableName.valueOf(tableId));
-    Cell cellInMiddle = table.get(new Get("24".getBytes())).rawCells()[0];
+    Cell cellInMiddle = table.get(new Get(mismatchRowInMiddle)).rawCells()[0];
     List<Put> puts =
         Arrays.asList(
             // Add a row at the start
-            new Put(Bytes.toBytes("000"))
+            new Put(mismatchRowAtStart)
                 .addColumn(CF.getBytes(), "random_col".getBytes(), 1L, "value000".getBytes())
                 .addColumn(CF.getBytes(), "random_col".getBytes(), 2L, "value001".getBytes()),
             // change a cell in middle
@@ -295,13 +359,13 @@ public void testHBaseSnapshotImportWithCorruptions() throws Exception {
                     cellInMiddle.getTimestamp(),
                     "corrupted_val".getBytes()),
             // add a new row in the end
-            new Put(Bytes.toBytes("9999"))
+            new Put(mismatchRowAtTheEnd)
                 .addColumn(CF.getBytes(), "random_col".getBytes(), 100L, "value999".getBytes()));
 
     table.put(puts);
     // Delete a random row in the middle. We should see 4 ranges mismatch as table is split on
-    // 1,2...9. We are splitting on 31, delete in 60s.
-    table.delete(new Delete("64".getBytes()));
+    // 1,2...9. All the updates are happening on a different split.
+    table.delete(new Delete(mismatchRowDeleted));
 
     // Run SyncTable job and expect 4 mismatches.
     SyncTableOptions syncOpts = createSyncTableOptions();
@@ -309,18 +373,15 @@ public void testHBaseSnapshotImportWithCorruptions() throws Exception {
     state = result.waitUntilFinish();
     Assert.assertEquals(State.DONE, state);
 
-    List<GcsPath> outputs = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "*"));
-
-    LOG.warn("OUTPUTS: " + outputs);
-    // FileSink will shard the outputs and will created >1 files.
-    Assert.assertTrue(outputs.size() > 1);
-    // TODO read the files and validate that the ranges are there instead of size check.
-    Assert.assertTrue((gcsUtil.fileSize(outputs.get(0)) + gcsUtil.fileSize(outputs.get(1))) > 0);
-
-    // gcsUtil.getObject(outputs.get(0));
+    List<RangeHash> syncTableOutputMismatches = readMismatchesFromOutputFiles();
+    Assert.assertEquals(4, syncTableOutputMismatches.size());
+    validateRowInRangeHashes(
+        Arrays.asList(
+            mismatchRowAtStart, mismatchRowAtTheEnd, mismatchRowDeleted, mismatchRowInMiddle),
+        syncTableOutputMismatches);
 
+    // Assert that the output collection is the right one.
     Map<String, Long> counters = getCountMap(result);
-    Assert.assertEquals(counters.size(), 2);
     Assert.assertEquals(counters.get("ranges_matched"), (Long) 97L);
     Assert.assertEquals(counters.get("ranges_not_matched"), (Long) 4L);
   }

From 443e2d16a36e16ad0b0241b620f289725baf2f4d Mon Sep 17 00:00:00 2001
From: shitanshu verma <shitanshu@google.com>
Date: Fri, 12 Feb 2021 12:02:09 -0500
Subject: [PATCH 7/8] Incorporating code review feedback.

---
 .../BufferedHadoopHashTableSource.java        |   7 +-
 ...omputeAndValidateHashFromBigtableDoFn.java | 151 +++++++++---------
 .../validation/HadoopHashTableSource.java     |  71 ++++----
 ...teAndValidateHashFromBigtableDoFnTest.java |  23 ---
 .../HashBasedSourceSerializationTest.java     |   5 -
 5 files changed, 104 insertions(+), 153 deletions(-)

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
index a616441655..e62b3c8215 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
@@ -68,6 +68,7 @@ public BufferedHadoopHashTableSource(HadoopHashTableSource hashTableSource, int
   public List<? extends BoundedSource<KV<String, List<RangeHash>>>> split(
       long desiredBundleSizeBytes, PipelineOptions options) throws IOException {
 
+    @SuppressWarnings("unchecked")
     List<HadoopHashTableSource> splitHashTableSources =
         (List<HadoopHashTableSource>) hashTableSource.split(desiredBundleSizeBytes, options);
 
@@ -93,7 +94,8 @@ public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
   }
 
   @Override
-  public BoundedReader createReader(PipelineOptions options) throws IOException {
+  public BoundedReader<KV<String, List<RangeHash>>> createReader(PipelineOptions options)
+      throws IOException {
     return new BufferedHashBasedReader(this, hashTableSource.createReader(options));
   }
 
@@ -174,7 +176,8 @@ public boolean advance() throws IOException {
     @Override
     public KV<String, List<RangeHash>> getCurrent() {
       // getCurrent only gets called when buffer is not empty.
-      Preconditions.checkArgument(!buffer.isEmpty(), "Can not get current on empty buffer.");
+      Preconditions.checkState(
+          !buffer.isEmpty(), "getCurrent() should only be called when start/advance return true.");
       // GroupBy key is a string and not ImmutableBytesWritable because the WritableCoder is not
       // deterministic. The outputted PCollection is grouped by the K and needs a deterministic
       // coder. Having a String K leads to an unfortunate double encoding, ImmutableBytesWritable->
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
index 62984e8ce2..a19eb9d218 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
@@ -18,6 +18,7 @@
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
 
 import com.google.bigtable.repackaged.com.google.common.base.Preconditions;
+import com.google.bigtable.repackaged.com.google.common.collect.Lists;
 import com.google.cloud.bigtable.beam.AbstractCloudBigtableTableDoFn;
 import com.google.cloud.bigtable.beam.CloudBigtableConfiguration;
 import com.google.cloud.bigtable.beam.TemplateUtils;
@@ -85,87 +86,81 @@ public ComputeAndValidateHashFromBigtableDoFn(SyncTableOptions options) {
 
   @ProcessElement
   public void processElement(ProcessContext context) throws Exception {
-    // BufferedHadoopHashTableSource generates only 1 item per groupby key, but iterate just in
-    // case.
-    for (List<RangeHash> rangeHashes : context.element().getValue()) {
-      if (rangeHashes.isEmpty()) {
-        // No rows ranges found, return;
-        continue;
+    List<List<RangeHash>> wrapperdRangeHashes = Lists.newArrayList(context.element().getValue());
+    // BufferedHadoopHashTableSource generates only 1 item per groupby key, key is startKey for the
+    // Sorted ranges.
+    Preconditions.checkState(
+        wrapperdRangeHashes.size() == 1, "Can not have muiple entries for a key");
+    List<RangeHash> rangeHashes = wrapperdRangeHashes.get(0);
+    Preconditions.checkState(!rangeHashes.isEmpty(), "Can not have empty ranges in DO_FN");
+
+    ImmutableBytesWritable rangeStartInclusive = rangeHashes.get(0).startInclusive;
+    ImmutableBytesWritable rangeEndExclusive =
+        rangeHashes.get(rangeHashes.size() - 1).stopExclusive;
+
+    BigtableResultHasher resultHasher = new BigtableResultHasher();
+    resultHasher.startBatch(rangeStartInclusive);
+
+    // Since all the row-ranges are sorted in HashTable's data files, 1 big scan can be used
+    // to read all the row ranges. Parallelism is achieved by splitting the HashTable's data
+    // files into smaller bundle of row-ranges in GroupBy.
+    ResultScanner scanner =
+        createBigtableScan(rangeStartInclusive.copyBytes(), rangeEndExclusive.copyBytes());
+
+    Iterator<RangeHash> rangeHashIterator = rangeHashes.iterator();
+    long numRows = 0;
+
+    RangeHash currentRangeHash = rangeHashIterator.next();
+
+    // Process each row and validate hashes
+    for (Result result : scanner) {
+      numRows++;
+      if (numRows % 10_000 == 0) {
+        // Heartbeat in logs in case a large scan gets hung.
+        DOFN_LOG.debug("Processed " + numRows + " rows ");
       }
 
-      ImmutableBytesWritable rangeStartInclusive = rangeHashes.get(0).startInclusive;
-      ImmutableBytesWritable rangeEndExclusive =
-          rangeHashes.get(rangeHashes.size() - 1).stopExclusive;
-
-      BigtableResultHasher resultHasher = new BigtableResultHasher();
-      resultHasher.startBatch(rangeStartInclusive);
-
-      // Since all the row-ranges are sorted in HashTable's data files, 1 big scan can be used
-      // to read all the row ranges. Parallelism is achieved by splitting the HashTable's data
-      // files into smaller bundle of row-ranges in GroupBy.
-      ResultScanner scanner =
-          createBigtableScan(rangeStartInclusive.copyBytes(), rangeEndExclusive.copyBytes());
-
-      Iterator<RangeHash> rangeHashIterator = rangeHashes.iterator();
-      long numRows = 0;
-
-      RangeHash currentRangeHash = rangeHashIterator.next();
-
-      // Process each row and validate hashes
-      for (Result result : scanner) {
-        numRows++;
-        if (numRows % 10_000 == 0) {
-          // Heartbeat in logs in case a large scan gets hung.
-          DOFN_LOG.debug("Processed " + numRows + " rows ");
-        }
-
-        ImmutableBytesWritable rowKey = new ImmutableBytesWritable(result.getRow());
-
-        // Check if the rowKey belongs to current range, if not keep iterating through the
-        // rangeHashes until rowKey's range is found.
-        while (!isWithinUpperBound(currentRangeHash.stopExclusive, rowKey)) {
-          validateBatchHash(context, resultHasher, currentRangeHash);
-          // THIS SHOULD NEVER HAPPEN. Bigtable is being scanned till the last
-          // RangeHash.endKeyExclusive(), so bigtable's result should not outlast the
-          // rangeHashes.
-          Preconditions.checkState(
-              rangeHashIterator.hasNext(),
-              "Buffer reached to end while scan is still active at row : %s. "
-                  + "Affected Range: [%s, %s)."
-                  + immutableBytesToString(result.getRow())
-                  + immutableBytesToString(rangeStartInclusive)
-                  + immutableBytesToString(rangeEndExclusive));
-          currentRangeHash = rangeHashIterator.next();
-        }
-
-        // Always Hash the current row.
-        resultHasher.hashResult(result);
+      ImmutableBytesWritable rowKey = new ImmutableBytesWritable(result.getRow());
+
+      // Check if the rowKey belongs to current range, if not keep iterating through the
+      // rangeHashes until rowKey's range is found.
+      while (!isWithinUpperBound(currentRangeHash.stopExclusive, rowKey)) {
+        validateBatchHash(context, resultHasher, currentRangeHash);
+        // THIS SHOULD NEVER HAPPEN. Bigtable is being scanned till the last
+        // RangeHash.endKeyExclusive(), so bigtable's result should not outlast the
+        // rangeHashes.
+        Preconditions.checkState(
+            rangeHashIterator.hasNext(),
+            "Buffer reached to end while scan is still active at row : %s. "
+                + "Affected Range: [%s, %s)."
+                + immutableBytesToString(result.getRow())
+                + immutableBytesToString(rangeStartInclusive)
+                + immutableBytesToString(rangeEndExclusive));
+        currentRangeHash = rangeHashIterator.next();
       }
 
-      // Bigtable scan is finished at this point and rangeHashes may contain additional row ranges.
-      // Last range will always be unverified as the range end is exclusive and
-      // currentRow > rangeEndExclusive will never by true. Verify the last range.
-      validateBatchHash(context, resultHasher, currentRangeHash);
+      // Always Hash the current row.
+      resultHasher.hashResult(result);
+    }
 
-      // If there are remaining ranges in the rangeHashes they all need to reported as mismatched as
-      // there is nothing in Cloud Bigtable for those row ranges.
-      // for (int i = bufferIndex; i < rangeHashes.size(); i++) {
-      while (rangeHashIterator.hasNext()) {
-        currentRangeHash = rangeHashIterator.next();
-        reportMismatch(context, currentRangeHash);
-      }
+    // Bigtable scan is finished at this point and rangeHashes may contain additional row ranges.
+    // Last range will always be unverified as the range end is exclusive and
+    // currentRow > rangeEndExclusive will never by true. Verify the last range.
+    validateBatchHash(context, resultHasher, currentRangeHash);
 
-      DOFN_LOG.debug(
-          "Finishing context by outputting "
-              + rangeHashes.size()
-              + " keys in range ["
-              + ((!rangeHashes.isEmpty())
-                  ? immutableBytesToString(rangeStartInclusive)
-                      + ", "
-                      + immutableBytesToString(rangeEndExclusive)
-                      + ")."
-                  : ", )."));
+    // If there are remaining ranges in the rangeHashes they all need to reported as mismatched as
+    // there is nothing in Cloud Bigtable for those row ranges.
+    // for (int i = bufferIndex; i < rangeHashes.size(); i++) {
+    while (rangeHashIterator.hasNext()) {
+      currentRangeHash = rangeHashIterator.next();
+      reportMismatch(context, currentRangeHash);
     }
+
+    DOFN_LOG.debug(
+        "Finishing context by outputting {}  keys in range [{}, {}).",
+        rangeHashes.size(),
+        immutableBytesToString(rangeStartInclusive),
+        immutableBytesToString(rangeEndExclusive));
   }
 
   private ResultScanner createBigtableScan(byte[] startKeyInclusive, byte[] stopKeyExclusive)
@@ -214,11 +209,9 @@ private void validateBatchHash(
   private void reportMismatch(ProcessContext context, RangeHash currentRangeHash) {
     mismatches.inc();
     DOFN_LOG.info(
-        "MISMATCH ON RANGE ["
-            + immutableBytesToString(currentRangeHash.startInclusive)
-            + ", "
-            + immutableBytesToString(currentRangeHash.stopExclusive)
-            + ").");
+        "MISMATCH ON RANGE [{}, {}).",
+        immutableBytesToString(currentRangeHash.startInclusive),
+        immutableBytesToString(currentRangeHash.stopExclusive));
     context.output(currentRangeHash);
   }
 }
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
index 138ba3f860..f6ecf21e24 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2020 Google Inc. All Rights Reserved.
+ * Copyright 2021 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7 +15,6 @@
  */
 package com.google.cloud.bigtable.beam.validation;
 
-import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.createConfiguration;
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
 
 import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
@@ -51,8 +50,11 @@ public class HadoopHashTableSource extends BoundedSource<RangeHash> implements S
 
   private static final long serialVersionUID = 2383724L;
 
+  private static final Coder<RangeHash> CODER = RangeHashCoder.of();
+
   /**
    * A simple POJO encapsulating a row range and the corresponding hash generated by HashTable job.
+   * TODO Evaluate if we can use AutoValue for this class.
    */
   @DefaultCoder(RangeHashCoder.class)
   public static class RangeHash {
@@ -111,24 +113,18 @@ public int hashCode() {
 
   public static final Log LOG = LogFactory.getLog(HadoopHashTableSource.class);
 
-  private ValueProvider<String> projectId;
+  private final ValueProvider<String> projectId;
 
   // Path to the output of HashTable job. Usually in GCS.
-  private ValueProvider<String> sourceHashDir;
-
-  // Coder to encode/decode the RangeHash
-  private RangeHashCoder coder;
+  private final ValueProvider<String> sourceHashDir;
 
   // Row range owned by this source.
-  @VisibleForTesting @Nullable ImmutableBytesWritable startRowInclusive;
+  // The Start and Stop row are serialized in a custom way.
+  @VisibleForTesting @Nullable transient ImmutableBytesWritable startRowInclusive;
 
-  @VisibleForTesting @Nullable ImmutableBytesWritable stopRowExclusive;
+  @VisibleForTesting @Nullable transient ImmutableBytesWritable stopRowExclusive;
 
-  private TableHashWrapperFactory tableHashWrapperFactory;
-
-  public HadoopHashTableSource() {
-    this.coder = new RangeHashCoder();
-  }
+  private final TableHashWrapperFactory tableHashWrapperFactory;
 
   /**
    * Creates a HadoopHashTableSource that reads HashTable data from hashTableOutputDir in GCS bucket
@@ -164,7 +160,6 @@ public HadoopHashTableSource(
       @Nullable ImmutableBytesWritable startRowInclusive,
       @Nullable ImmutableBytesWritable stopRowExclusive,
       TableHashWrapperFactory tableHashWrapperFactory) {
-    this.coder = new RangeHashCoder();
     this.projectId = projectId;
     this.sourceHashDir = hadoopHashTableOutputDir;
     // startRow and stopRow will be null when the template is initialized. startRow and stopRow are
@@ -199,7 +194,7 @@ public List<? extends BoundedSource<RangeHash>> split(
     }
 
     // Use the HashTable start key. The value is HConstants.EMPTY_START_ROW for full table scan.
-    ImmutableBytesWritable startRow = hash.getStartRow();
+    ImmutableBytesWritable nextStartRow = hash.getStartRow();
     ImmutableBytesWritable stopRow = hash.getStopRow();
 
     // The output of HashTable is organized as partition file and a set of datafiles.
@@ -211,42 +206,39 @@ public List<? extends BoundedSource<RangeHash>> split(
     // partition{i}).
     // So a partition file containing entries [b,f] for a table with row range [a,z] will have 3
     // data files containing hashes.
-    // file0 will contain [a(startRow), b), file1 will contain [b,f), and file3 will contain
+    // file0 will contain [a(nextStartRow), b), file1 will contain [b,f), and file3 will contain
     // [f,z(stopRow))
     for (int i = 0; i < numPartitions; i++) {
+      // TODO make a utility function that generates [start, end) format from start/end.
       LOG.debug(
           "Adding: ["
-              + immutableBytesToString(startRow.get())
+              + immutableBytesToString(nextStartRow.get())
               + ", "
               + immutableBytesToString(partitions.get(i).get())
-              + "]");
+              + ")");
       splitSources.add(
           new HadoopHashTableSource(
-              projectId, sourceHashDir, startRow, partitions.get(i), tableHashWrapperFactory));
-      startRow = partitions.get(i);
+              projectId, sourceHashDir, nextStartRow, partitions.get(i), tableHashWrapperFactory));
+      nextStartRow = partitions.get(i);
     }
     // Add the last range for [lastPartition, stopRow).
     LOG.debug(
         "Adding: ["
-            + immutableBytesToString(startRow.get())
+            + immutableBytesToString(nextStartRow.get())
             + ", "
             + immutableBytesToString(stopRow.get())
-            + "]");
+            + ")");
     // Add the last range for [lastPartition, stopRow).
     splitSources.add(
         new HadoopHashTableSource(
-            projectId,
-            sourceHashDir,
-            partitions.get(numPartitions - 1),
-            stopRow,
-            tableHashWrapperFactory));
+            projectId, sourceHashDir, nextStartRow, stopRow, tableHashWrapperFactory));
     LOG.info("Returning " + splitSources.size() + " sources from " + numPartitions + " partitions");
     return splitSources;
   }
 
   @Override
   public Coder<RangeHash> getOutputCoder() {
-    return coder;
+    return CODER;
   }
 
   @Override
@@ -256,7 +248,7 @@ public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
   }
 
   @Override
-  public BoundedReader createReader(PipelineOptions options) throws IOException {
+  public BoundedReader<RangeHash> createReader(PipelineOptions options) throws IOException {
     TableHashWrapper hash =
         tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
 
@@ -273,7 +265,7 @@ public BoundedReader createReader(PipelineOptions options) throws IOException {
         startRowInclusive,
         stopRowExclusive,
         hash.newReader(
-            createConfiguration(this.projectId.get(), this.sourceHashDir.get()),
+            SyncTableUtils.createConfiguration(this.projectId.get(), this.sourceHashDir.get()),
             startRowInclusive));
   }
 
@@ -294,7 +286,7 @@ public boolean equals(Object o) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(projectId, sourceHashDir, coder, startRowInclusive, stopRowExclusive);
+    return Objects.hashCode(projectId, sourceHashDir, startRowInclusive, stopRowExclusive);
   }
 
   @Override
@@ -307,9 +299,7 @@ public String toString() {
   }
 
   private void writeObject(ObjectOutputStream s) throws IOException {
-    s.writeObject(projectId);
-    s.writeObject(sourceHashDir);
-    s.writeObject(tableHashWrapperFactory);
+    s.defaultWriteObject();
     // Start and Stop can be null, write a boolean to indicate if start/stop is expected.
     if (startRowInclusive == null) {
       s.writeBoolean(false);
@@ -327,9 +317,7 @@ private void writeObject(ObjectOutputStream s) throws IOException {
   }
 
   private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
-    projectId = (ValueProvider<String>) s.readObject();
-    sourceHashDir = (ValueProvider<String>) s.readObject();
-    tableHashWrapperFactory = (TableHashWrapperFactory) s.readObject();
+    s.defaultReadObject();
     // start/stop can be null, they are preceded by a boolean indicating their presence.
     if (s.readBoolean() == true) {
       startRowInclusive = new ImmutableBytesWritable((byte[]) s.readObject());
@@ -348,7 +336,6 @@ static class HashBasedReader extends BoundedReader<RangeHash> {
     @VisibleForTesting final ImmutableBytesWritable startRowInclusive;
     @VisibleForTesting final ImmutableBytesWritable stopRowExclusive;
 
-    private long numKeys = 0;
     // Flag indicating that this workitem is finished.
     private boolean isDone = false;
     private ImmutableBytesWritable currentRangeStartKey;
@@ -375,7 +362,6 @@ public boolean start() throws IOException {
               + " ,"
               + immutableBytesToString(stopRowExclusive)
               + ").");
-      numKeys = 0;
 
       if (readNextKey()) {
         // Dataflow calls start, followed by getCurrent. HashBased reader needs to read on TableHash
@@ -408,7 +394,6 @@ public boolean advance() throws IOException {
     // Returns true if a key can be read for this workitem.
     private boolean readNextKey() throws IOException {
       if (reader.next()) {
-        numKeys++;
         currentRangeStartKey = reader.getCurrentKey();
         if ( // StopRow is not set, everything is in bounds.
         (stopRowExclusive.equals(HConstants.EMPTY_END_ROW)
@@ -442,9 +427,7 @@ public void close() throws IOException {
               + immutableBytesToString(startRowInclusive)
               + " ,"
               + immutableBytesToString(stopRowExclusive)
-              + ") after reading "
-              + numKeys
-              + " keys. Ending at "
+              + "). Ending at "
               + immutableBytesToString(currentRangeStartKey));
       reader.close();
     }
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
index 2e9b6fd8ed..8c608b74db 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
@@ -269,29 +269,6 @@ public void testHashMatchesForMultipleSingleRowRange() throws Exception {
     validateCounters(result, 3L, 0L);
   }
 
-  ///////////////// Test mismatches with multiple ranges per Key in KV<> ////////////////////
-  @Test
-  public void testHashMisMatchesForMultipleRangeAcrossKV() throws Exception {
-    hashes.add(createHash(getRowKey(21), getRowKey(24)));
-    hashes.add(createHash(getRowKey(24), getRowKey(28)));
-
-    // Corrupt both the ranges
-    table.delete(new Delete(getRowKey(21)).addColumns(CF, COL, TS));
-    table.put(new Put(getRowKey(24)).addColumn(CF2, COL, TS, getValue(20, 0)));
-
-    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
-        p.apply(
-            Create.of(
-                KV.of(
-                    new String(getRowKey(21)),
-                    Arrays.asList(Arrays.asList(hashes.get(0)), Arrays.asList(hashes.get(1))))));
-
-    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
-    PAssert.that(output).containsInAnyOrder(hashes);
-    PipelineResult result = p.run();
-    validateCounters(result, 0L, 2L);
-  }
-
   ///////////////// Test mismatches when Bigtable has extra rows ////////////////////
   @Test
   public void testAdditionalCellInMiddle() throws Exception {
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
index 8c7f6cc8c4..f58becf3cb 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
@@ -46,11 +46,6 @@ public void setUp() throws Exception {
     super.setUp();
   }
 
-  @Test
-  public void testSerializeDefaultConstructor() throws IOException {
-    checkSerialization(new HadoopHashTableSource());
-  }
-
   @Test
   public void testSerializeWithValueProviders() throws IOException {
     checkSerialization(

From 0c3fd6786d0cfe366c64c115152516da34336d74 Mon Sep 17 00:00:00 2001
From: shitanshu verma <shitanshu@google.com>
Date: Fri, 12 Feb 2021 14:16:05 -0500
Subject: [PATCH 8/8] Incorporating code review feedback.

---
 .../beam/validation/ComputeAndValidateHashFromBigtableDoFn.java | 2 +-
 .../cloud/bigtable/beam/validation/TableHashWrapperFactory.java | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
index a19eb9d218..a75833b022 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
@@ -187,7 +187,7 @@ private ResultScanner createBigtableScan(byte[] startKeyInclusive, byte[] stopKe
    * Determines if row >= stopExclusive for a row range (start, stopExclusive). Empty stopExclusive
    * represents a range with no upper bound.
    */
-  private boolean isWithinUpperBound(
+  private static boolean isWithinUpperBound(
       ImmutableBytesWritable stopExclusive, ImmutableBytesWritable row) {
     return stopExclusive.equals(HConstants.EMPTY_END_ROW) || row.compareTo(stopExclusive) < 0;
   }
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
index 67776299a4..a4e3544519 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
@@ -18,10 +18,12 @@
 
 import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.createConfiguration;
 
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
 import java.io.IOException;
 import java.io.Serializable;
 
 /** Factory to create a TableHashWrapper. */
+@InternalApi
 public class TableHashWrapperFactory implements Serializable {
 
   private static final long serialVersionUID = 265433454L;