diff --git a/.kokoro/nightly/integration-beam.cfg b/.kokoro/nightly/integration-beam.cfg
new file mode 100644
index 0000000000..f91f157259
--- /dev/null
+++ b/.kokoro/nightly/integration-beam.cfg
@@ -0,0 +1,38 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Configure the docker image for kokoro-trampoline.
+env_vars: {
+  key: "TRAMPOLINE_IMAGE"
+  value: "gcr.io/cloud-devrel-kokoro-resources/java8"
+}
+
+env_vars: {
+    key: "INTEGRATION_TEST_ARGS"
+    value: "-PbeamIntegrationTest -Dgoogle.bigtable.project.id=gcloud-devel -Dgoogle.bigtable.instance.id=google-cloud-bigtable -Dgoogle.dataflow.stagingLocation=gs://java-bigtable-hbase-testing/staging -Dcloud.test.data.folder=gs://java-bigtable-hbase-testing/hbase-snapshot-import-integration-tests -Dregion=us-central1"
+}
+
+env_vars: {
+  key: "JOB_TYPE"
+  value: "integration"
+}
+
+# TODO: remove this after we've migrated all tests and scripts
+env_vars: {
+  key: "GCLOUD_PROJECT"
+  value: "gcloud-devel"
+}
+
+env_vars: {
+  key: "GOOGLE_CLOUD_PROJECT"
+  value: "gcloud-devel"
+}
+
+env_vars: {
+  key: "GOOGLE_APPLICATION_CREDENTIALS"
+  value: "secret_manager/java-it-service-account"
+}
+
+env_vars: {
+  key: "SECRET_MANAGER_KEYS"
+  value: "java-it-service-account"
+}
diff --git a/.kokoro/presubmit/integration-beam.cfg b/.kokoro/presubmit/integration-beam.cfg
new file mode 100644
index 0000000000..f91f157259
--- /dev/null
+++ b/.kokoro/presubmit/integration-beam.cfg
@@ -0,0 +1,38 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Configure the docker image for kokoro-trampoline.
+env_vars: {
+  key: "TRAMPOLINE_IMAGE"
+  value: "gcr.io/cloud-devrel-kokoro-resources/java8"
+}
+
+env_vars: {
+    key: "INTEGRATION_TEST_ARGS"
+    value: "-PbeamIntegrationTest -Dgoogle.bigtable.project.id=gcloud-devel -Dgoogle.bigtable.instance.id=google-cloud-bigtable -Dgoogle.dataflow.stagingLocation=gs://java-bigtable-hbase-testing/staging -Dcloud.test.data.folder=gs://java-bigtable-hbase-testing/hbase-snapshot-import-integration-tests -Dregion=us-central1"
+}
+
+env_vars: {
+  key: "JOB_TYPE"
+  value: "integration"
+}
+
+# TODO: remove this after we've migrated all tests and scripts
+env_vars: {
+  key: "GCLOUD_PROJECT"
+  value: "gcloud-devel"
+}
+
+env_vars: {
+  key: "GOOGLE_CLOUD_PROJECT"
+  value: "gcloud-devel"
+}
+
+env_vars: {
+  key: "GOOGLE_APPLICATION_CREDENTIALS"
+  value: "secret_manager/java-it-service-account"
+}
+
+env_vars: {
+  key: "SECRET_MANAGER_KEYS"
+  value: "java-it-service-account"
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/README.md b/bigtable-dataflow-parent/bigtable-beam-import/README.md
index 7d7b4025ec..783de14443 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/README.md
+++ b/bigtable-dataflow-parent/bigtable-beam-import/README.md
@@ -4,8 +4,8 @@ This project supports importing and exporting HBase Sequence Files to Google Clo
 Cloud Dataflow.
 
 ## Instructions
-
-Download [the import/export jar](http://search.maven.org/remotecontent?filepath=com/google/cloud/bigtable/bigtable-beam-import/1.1.0/bigtable-beam-import-1.1.0-shaded.jar), which is an aggregation of all required jars.
+[//]: # ({x-version-update-start:bigtable-dataflow-parent:released})
+Download [the import/export jar](http://search.maven.org/remotecontent?filepath=com/google/cloud/bigtable/bigtable-beam-import/1.19.3/bigtable-beam-import-1.19.3-shaded.jar), which is an aggregation of all required jars.
 
 Please pay attention to the Cluster CPU usage and adjust the number of Dataflow workers accordingly.
 
@@ -14,7 +14,7 @@ Please pay attention to the Cluster CPU usage and adjust the number of Dataflow
 On the command line:
 
 ```
-java -jar bigtable-beam-import-1.1.0-shaded.jar export \
+java -jar bigtable-beam-import-1.19.3-shaded.jar export \
     --runner=dataflow \
     --project=[your_project_id] \
     --bigtableInstanceId=[your_instance_id] \
@@ -32,7 +32,7 @@ Create the table in your cluster.
 On the command line:
 
 ```
-java -jar bigtable-beam-import-1.1.0-shaded.jar import \
+java -jar bigtable-beam-import-1.19.3-shaded.jar import \
     --runner=dataflow \
     --project=[your_project_id] \
     --bigtableInstanceId=[your_instance_id] \
@@ -42,3 +42,4 @@ java -jar bigtable-beam-import-1.1.0-shaded.jar import \
     --maxNumWorkers=[3x number of nodes] \
     --zone=[zone of your cluster]
 ```
+[//]: # ({x-version-update-end})
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
index ed488f86c4..2448fdea12 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
+++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml
@@ -25,7 +25,7 @@ limitations under the License.
   <artifactId>bigtable-beam-import</artifactId>
 
   <properties>
-    <mainClass>com.google.cloud.bigtable.beam.sequencefiles.Main</mainClass>
+    <mainClass>com.google.cloud.bigtable.beam.Main</mainClass>
   </properties>
 
   <!-- Adding this to resolve version conflict within beam sdk-->
@@ -46,6 +46,12 @@ limitations under the License.
       <groupId>${project.groupId}</groupId>
       <artifactId>bigtable-hbase-beam</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-shaded-client</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>com.google.cloud.bigtable</groupId>
@@ -64,9 +70,12 @@ limitations under the License.
           <groupId>io.opencensus</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.apache.hbase</groupId>
+          <artifactId>hbase-shaded-client</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
-
     <dependency>
       <groupId>org.apache.beam</groupId>
       <artifactId>beam-sdks-java-core</artifactId>
@@ -84,10 +93,18 @@ limitations under the License.
       <artifactId>beam-sdks-java-io-hadoop-common</artifactId>
       <version>${beam.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-io-hadoop-format</artifactId>
+      <version>${beam.version}</version>
+    </dependency>
 
+    <!-- For HBase 2.x, this should be hbase-mapreduce
+    https://hbase.apache.org/2.1/book.html#export
+    -->
     <dependency>
       <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-shaded-client</artifactId>
+      <artifactId>hbase-shaded-server</artifactId>
       <version>${hbase.version}</version>
     </dependency>
 
@@ -101,7 +118,7 @@ limitations under the License.
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
-      <version>${beam-guava.version}</version>
+      <version>${gcs-guava.version}</version>
     </dependency>
 
     <!-- TODO: check if commons-codec was transitively updated to 1.13 and okhttp was updated to 2.7.5 when upgrading-->
@@ -121,6 +138,12 @@ limitations under the License.
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${jsr305.version}</version>
+    </dependency>
+
     <!-- TODO: remove these dependencies when upgraded through transitive dependency (beam-runners-google-cloud-dataflow-java)
         these are not used directly, but upgrading due to transitive vulnerabilities in older versions-->
     <dependency>
@@ -146,6 +169,21 @@ limitations under the License.
       <artifactId>slf4j-api</artifactId>
       <version>${slf4j.version}</version>
     </dependency>
+    <!-- https://mvnrepository.com/artifact/com.google.cloud.bigdataoss/gcs-connector -->
+    <dependency>
+      <groupId>com.google.cloud.bigdataoss</groupId>
+      <artifactId>gcs-connector</artifactId>
+      <version>hadoop2-2.1.4</version>
+      <classifier>shaded</classifier>
+    </dependency>
+
+    <!-- https://mvnrepository.com/artifact/com.google.apis/google-api-services-storage -->
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-storage</artifactId>
+      <version>v1-rev171-1.25.0</version>
+    </dependency>
+
 
     <!-- Test -->
     <dependency>
@@ -178,6 +216,24 @@ limitations under the License.
       <version>${junit.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-shaded-testing-util</artifactId>
+      <version>${hbase.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.truth</groupId>
+      <artifactId>truth</artifactId>
+      <version>1.0.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.cloud</groupId>
+      <artifactId>google-cloud-bigtable-emulator</artifactId>
+      <version>0.124.0</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
@@ -213,7 +269,7 @@ limitations under the License.
       <plugin>
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>exec-maven-plugin</artifactId>
-        <version>1.6.0</version>
+        <version>3.0.0</version>
         <executions>
           <execution>
             <goals>
@@ -229,7 +285,7 @@ limitations under the License.
       <plugin>
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>build-helper-maven-plugin</artifactId>
-        <version>3.0.0</version>
+        <version>3.2.0</version>
         <executions>
           <execution>
             <id>add-source</id>
@@ -262,6 +318,16 @@ limitations under the License.
           <transformers>
             <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
           </transformers>
+	  <filters>
+	      <filter>
+		  <artifact>*:*</artifact>
+		  <excludes>
+		      <exclude>META-INF/*.SF</exclude>
+		      <exclude>META-INF/*.DSA</exclude>
+		      <exclude>META-INF/*.RSA</exclude>
+		  </excludes>
+	      </filter>
+	  </filters>
         </configuration>
       </plugin>
 
@@ -332,7 +398,7 @@ limitations under the License.
                 <configuration>
                   <forkCount>1</forkCount>
                   <includes>
-                    <include>**/*IT.java</include>
+                    <include>**/sequencefiles/*IT.java</include>
                   </includes>
                   <!-- Use Isolated Classloader so that dataflow can find all files
                        that must be staged.
@@ -346,6 +412,7 @@ limitations under the License.
       </build>
     </profile>
 
+    <!-- profile to run all integration tests -->
     <profile>
       <id>beamIntegrationTest</id>
       <build>
@@ -364,6 +431,7 @@ limitations under the License.
                   <forkCount>1</forkCount>
                   <includes>
                     <include>**/CloudBigtableBeamITTest.java</include>
+                    <include>**/*IT.java</include>
                   </includes>
                   <reportNameSuffix>bigtable-beam</reportNameSuffix>
                 </configuration>
@@ -373,5 +441,38 @@ limitations under the License.
         </plugins>
       </build>
     </profile>
+
+    <profile>
+      <id>hbasesnapshotsIntegrationTest</id>
+      <properties>
+        <!-- Needed to enable integration tests before we figure out -->
+        <!-- possible misconfiguration of Failsafe plugin -->
+        <skipITs>false</skipITs>
+      </properties>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-failsafe-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>hbasesnapshots-integration-test</id>
+                <goals>
+                  <goal>integration-test</goal>
+                </goals>
+                <phase>integration-test</phase>
+                <configuration>
+                  <forkCount>1</forkCount>
+                  <includes>
+                    <include>**/hbasesnapshots/*IT.java</include>
+                  </includes>
+                  <useSystemClassLoader>false</useSystemClassLoader>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 </project>
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java
new file mode 100644
index 0000000000..1f52f5125a
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly;
+import com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot;
+import com.google.cloud.bigtable.beam.sequencefiles.CreateTableHelper;
+import com.google.cloud.bigtable.beam.sequencefiles.ExportJob;
+import com.google.cloud.bigtable.beam.sequencefiles.ImportJob;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob;
+import java.io.File;
+import java.net.URISyntaxException;
+import java.util.Arrays;
+
+/** Entry point for create-table/import/export job submission. */
+@InternalExtensionOnly
+final class Main {
+  /** For internal use only - public for technical reasons. */
+  @InternalApi("For internal usage only")
+  public Main() {}
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 1) {
+      usage();
+      System.exit(1);
+    }
+
+    String[] subArgs = Arrays.copyOfRange(args, 1, args.length);
+
+    switch (args[0]) {
+      case "export":
+        ExportJob.main(subArgs);
+        break;
+      case "import":
+        ImportJob.main(subArgs);
+        break;
+      case "importsnapshot":
+        ImportJobFromHbaseSnapshot.main(subArgs);
+        break;
+      case "create-table":
+        CreateTableHelper.main(subArgs);
+        break;
+      case "sync-table":
+        SyncTableJob.main(subArgs);
+        break;
+      default:
+        usage();
+        System.exit(1);
+    }
+  }
+
+  private static void usage() {
+    String jarName;
+
+    try {
+      jarName =
+          new File(Main.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
+              .getName();
+    } catch (URISyntaxException e) {
+      jarName = "<jar>";
+    }
+
+    System.out.printf(
+        "java -jar %s <action> <action_params>\n"
+            + "Where <action> can be 'export', 'import' , 'importsnapshot' or 'create-table'. To get further help, run: \n"
+            + "java -jar %s <action> --help\n",
+        jarName, jarName);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
index e64507317b..f839a50b23 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java
@@ -26,6 +26,7 @@
 import com.google.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.Query;
 import com.google.cloud.bigtable.beam.sequencefiles.ExportJob.ExportOptions;
 import com.google.cloud.bigtable.beam.sequencefiles.ImportJob.ImportOptions;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
 import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
 import com.google.cloud.bigtable.hbase.adapters.Adapters;
 import com.google.cloud.bigtable.hbase.adapters.read.DefaultReadHooks;
@@ -72,6 +73,19 @@ public static CloudBigtableTableConfiguration BuildImportConfig(ImportOptions op
     return builder.build();
   }
 
+  /** Builds CloudBigtableTableConfiguration from input runtime parameters for import job. */
+  public static CloudBigtableTableConfiguration BuildSyncTableConfig(SyncTableOptions opts) {
+    CloudBigtableTableConfiguration.Builder builder =
+        new CloudBigtableTableConfiguration.Builder()
+            .withProjectId(opts.getBigtableProject())
+            .withInstanceId(opts.getBigtableInstanceId())
+            .withTableId(opts.getBigtableTableId());
+    if (opts.getBigtableAppProfileId() != null) {
+      builder.withAppProfileId(opts.getBigtableAppProfileId());
+    }
+    return builder.build();
+  }
+
   /** Provides a request that is constructed with some attributes. */
   private static class RequestValueProvider
       implements ValueProvider<ReadRowsRequest>, Serializable {
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFn.java
new file mode 100644
index 0000000000..e0bdca69d5
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFn.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import com.google.api.services.storage.model.Objects;
+import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
+import org.apache.beam.sdk.extensions.gcp.util.GcsUtil;
+import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * A {@link DoFn} that could be used for cleaning up temp files generated during HBase snapshot
+ * scans in Google Cloud Storage(GCS) bucket via GCS connector.
+ */
+class CleanupHBaseSnapshotRestoreFilesFn extends DoFn<KV<String, String>, Boolean> {
+  private static final Log LOG = LogFactory.getLog(CleanupHBaseSnapshotRestoreFilesFn.class);
+
+  @ProcessElement
+  public void processElement(ProcessContext context) throws IOException {
+    KV<String, String> elem = context.element();
+
+    String hbaseSnapshotDir = elem.getKey();
+    String restorePath = elem.getValue();
+    String prefix = getListPrefix(restorePath);
+    String bucketName = getWorkingBucketName(hbaseSnapshotDir);
+    Preconditions.checkState(
+        !prefix.isEmpty() && !hbaseSnapshotDir.contains(String.format("%s/%s", bucketName, prefix)),
+        "restore folder should not be empty or a subfolder of hbaseSnapshotSourceDir");
+    GcpOptions gcpOptions = context.getPipelineOptions().as(GcpOptions.class);
+    GcsUtil gcsUtil = new GcsUtil.GcsUtilFactory().create(gcpOptions);
+
+    String pageToken = null;
+    List<String> results = new ArrayList<>();
+    do {
+      Objects objects = gcsUtil.listObjects(bucketName, prefix, pageToken);
+      if (objects.getItems() == null) {
+        break;
+      }
+
+      objects.getItems().stream()
+          .map(storageObject -> GcsPath.fromObject(storageObject).toString())
+          .forEach(results::add);
+      pageToken = objects.getNextPageToken();
+    } while (pageToken != null);
+    gcsUtil.remove(results);
+    context.output(true);
+  }
+
+  public static String getWorkingBucketName(String hbaseSnapshotDir) {
+    Preconditions.checkArgument(
+        hbaseSnapshotDir.startsWith(GcsPath.SCHEME),
+        "snapshot folder must be hosted in a GCS bucket ");
+
+    return GcsPath.fromUri(hbaseSnapshotDir).getBucket();
+  }
+  // getListPrefix convert absolute restorePath in a Hadoop filesystem
+  // to a match prefix in a GCS bucket
+  public static String getListPrefix(String restorePath) {
+    Preconditions.checkArgument(
+        restorePath.startsWith("/"),
+        "restore folder must be an absolute path in current filesystem");
+    return restorePath.substring(1);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilder.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilder.java
new file mode 100644
index 0000000000..63ebddb20a
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilder.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import com.google.common.base.Preconditions;
+import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
+import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.util.Base64;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * A {@link Configuration} that could be used in {@link HadoopFormatIO} for reading HBase snapshot
+ * hosted in Google Cloud Storage(GCS) bucket via GCS connector. It uses {@link
+ * TableSnapshotInputFormat} for reading HBase snapshots.
+ */
+class HBaseSnapshotInputConfigBuilder {
+
+  private static final Log LOG = LogFactory.getLog(HBaseSnapshotInputConfigBuilder.class);
+  // Batch size used for HBase snapshot scans
+  private static final int BATCH_SIZE = 1000;
+
+  // a temp location to store metadata extracted from snapshot
+  public static final String RESTORE_DIR = "/.restore";
+
+  private String projectId;
+  private String hbaseSnapshotSourceDir;
+  private String snapshotName;
+  private String restoreDirSuffix;
+
+  public HBaseSnapshotInputConfigBuilder() {}
+
+  /*
+   * Set the project id use to access the GCS bucket with HBase snapshot data to be imported
+   */
+  public HBaseSnapshotInputConfigBuilder setProjectId(String projectId) {
+    this.projectId = projectId;
+    return this;
+  }
+
+  /*
+   * Set the GCS path where the HBase snapshot data is located
+   */
+  public HBaseSnapshotInputConfigBuilder setHbaseSnapshotSourceDir(String hbaseSnapshotSourceDir) {
+    this.hbaseSnapshotSourceDir = hbaseSnapshotSourceDir;
+    return this;
+  }
+
+  /*
+   * Set the name of the snapshot to be imported
+   * e.g when importing snapshot 'gs://<your-gcs-path>/hbase-export/table_snapshot'
+   * put 'table_snapshot' as the {@code snapshotName}
+   * and 'gs://<your-gcs-path>/hbase-export' as {@code exportedSnapshotDir}
+   */
+  public HBaseSnapshotInputConfigBuilder setSnapshotName(String snapshotName) {
+    this.snapshotName = snapshotName;
+    return this;
+  }
+
+  /*
+   * Set the unique suffix to be used for restore folder to avoid conflicts
+   */
+  public HBaseSnapshotInputConfigBuilder setRestoreDirSuffix(String suffix) {
+    this.restoreDirSuffix = suffix;
+    return this;
+  }
+
+  public String getRestoreDir() {
+    return RESTORE_DIR + this.restoreDirSuffix;
+  }
+
+  public Configuration build() throws Exception {
+    Preconditions.checkNotNull(projectId, "Required value projectId must be set");
+    Preconditions.checkNotNull(
+        hbaseSnapshotSourceDir, "Required value hbaseSnapshotSourceDir must be set");
+    Preconditions.checkNotNull(snapshotName, "Required value snapshotName must be set");
+    Preconditions.checkState(
+        hbaseSnapshotSourceDir.startsWith(GcsPath.SCHEME),
+        "snapshot folder must be hosted in a GCS bucket ");
+
+    Configuration conf = createHBaseConfiguration();
+
+    // Configuring a MapReduce Job base on HBaseConfiguration
+    // and return the job Configuration
+    ClientProtos.Scan proto = ProtobufUtil.toScan(new Scan().setBatch(BATCH_SIZE));
+    conf.set(TableInputFormat.SCAN, Base64.encodeBytes(proto.toByteArray()));
+    Job job = Job.getInstance(conf); // creates internal clone of hbaseConf
+    // the restore folder need to under current bucket root so to be considered
+    // within the same filesystem with the hbaseSnapshotSourceDir
+    TableSnapshotInputFormat.setInput(job, snapshotName, new Path(getRestoreDir()));
+    return job.getConfiguration(); // extract the modified clone
+  }
+
+  // separate static part for unit testing
+  public Configuration createHBaseConfiguration() {
+    Configuration conf = HBaseConfiguration.create();
+
+    // Setup the input data location for HBase snapshot import
+    // exportedSnapshotDir should be a GCS Bucket path.
+    conf.set("hbase.rootdir", hbaseSnapshotSourceDir);
+    conf.set("fs.defaultFS", hbaseSnapshotSourceDir);
+
+    // Setup GCS connector to use GCS as Hadoop filesystem
+    conf.set("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS");
+    conf.set("fs.gs.project.id", projectId);
+    conf.setBoolean("google.cloud.auth.service.account.enable", true);
+
+    // Setup MapReduce config for TableSnapshotInputFormat
+    conf.setClass(
+        "mapreduce.job.inputformat.class", TableSnapshotInputFormat.class, InputFormat.class);
+    conf.setClass("key.class", ImmutableBytesWritable.class, Writable.class);
+    conf.setClass("value.class", Result.class, Object.class);
+    return conf;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java
new file mode 100644
index 0000000000..2d8ce7c31f
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly;
+import com.google.cloud.bigtable.beam.CloudBigtableIO;
+import com.google.cloud.bigtable.beam.TemplateUtils;
+import com.google.cloud.bigtable.beam.sequencefiles.HBaseResultToMutationFn;
+import com.google.cloud.bigtable.beam.sequencefiles.ImportJob;
+import com.google.cloud.bigtable.beam.sequencefiles.Utils;
+import com.google.common.annotations.VisibleForTesting;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.Wait;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * A job that imports data from HBase snapshot exports hosted in Cloud Storage bucket into Cloud
+ * Bigtable.
+ *
+ * <p>Example: If you have exported your HBase Snapshot to GCS bucket gs://$HBASE_EXPORT_ROOT_PATH
+ * and want to import snapshot gs://$HBASE_EXPORT_ROOT_PATH/.hbase-snapshot/$SNAPSHOT_NAME into
+ * Cloud Bigtable $TABLE in $INSTANCE, execute the following command to run the job directly:
+ *
+ * <pre>
+ * mvn compile exec:java \
+ *   -DmainClass=com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot \
+ *   -Dexec.args="--runner=DataflowRunner \
+ *                --stagingLocation=gs://$STAGING_PATH \
+ *                --project=$PROJECT \
+ *                --bigtableInstanceId=$INSTANCE \
+ *                --bigtableTableId=$TABLE \
+ *                --hbaseSnapshotSourceDir=gs://$HBASE_EXPORT_ROOT_PATH \
+ *                --snapshotName=$SNAPSHOT_NAME
+ * </pre>
+ *
+ * Note that in the case of job failures, the temp files generated in the .restore-$JOB_NAME
+ * directory under the snapshot export bucket will not get deleted. Hence one need to either launch
+ * a replacement job with the same jobName to re-run the job or manually delete this directory.
+ */
+@InternalExtensionOnly
+public class ImportJobFromHbaseSnapshot {
+  private static final Log LOG = LogFactory.getLog(ImportJobFromHbaseSnapshot.class);
+
+  public interface ImportOptions extends ImportJob.ImportOptions {
+    @Description("The HBase root dir where HBase snapshot files resides.")
+    String getHbaseSnapshotSourceDir();
+
+    @SuppressWarnings("unused")
+    void setHbaseSnapshotSourceDir(String hbaseSnapshotSourceDir);
+
+    @Description("Snapshot name")
+    String getSnapshotName();
+
+    @SuppressWarnings("unused")
+    void setSnapshotName(String snapshotName);
+  }
+
+  public static void main(String[] args) throws Exception {
+    PipelineOptionsFactory.register(ImportOptions.class);
+
+    ImportOptions opts =
+        PipelineOptionsFactory.fromArgs(args).withValidation().as(ImportOptions.class);
+
+    LOG.info("Building Pipeline");
+    Pipeline pipeline = buildPipeline(opts);
+    LOG.info("Running Pipeline");
+    PipelineResult result = pipeline.run();
+
+    if (opts.getWait()) {
+      Utils.waitForPipelineToFinish(result);
+    }
+  }
+
+  @VisibleForTesting
+  static Pipeline buildPipeline(ImportOptions opts) throws Exception {
+
+    Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts));
+    HBaseSnapshotInputConfigBuilder configurationBuilder =
+        new HBaseSnapshotInputConfigBuilder()
+            .setProjectId(opts.getProject())
+            .setHbaseSnapshotSourceDir(opts.getHbaseSnapshotSourceDir())
+            .setSnapshotName(opts.getSnapshotName())
+            .setRestoreDirSuffix(opts.getJobName())
+            .setRestoreDirSuffix(opts.getJobName());
+    PCollection<KV<ImmutableBytesWritable, Result>> readResult =
+        pipeline.apply(
+            "Read from HBase Snapshot",
+            HadoopFormatIO.<ImmutableBytesWritable, Result>read()
+                .withConfiguration(configurationBuilder.build()));
+
+    readResult
+        .apply("Create Mutations", ParDo.of(new HBaseResultToMutationFn()))
+        .apply(
+            "Write to Bigtable",
+            CloudBigtableIO.writeToTable(TemplateUtils.BuildImportConfig(opts)));
+
+    final List<KV<String, String>> sourceAndRestoreFolders =
+        Arrays.asList(
+            KV.of(opts.getHbaseSnapshotSourceDir(), configurationBuilder.getRestoreDir()));
+    pipeline
+        .apply(Create.of(sourceAndRestoreFolders))
+        .apply(Wait.on(readResult))
+        .apply(ParDo.of(new CleanupHBaseSnapshotRestoreFilesFn()));
+
+    return pipeline;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java
index b4b3862817..4c794ed7eb 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java
@@ -57,7 +57,7 @@
  * intended to be a preparation step before running an {@link ImportJob}.
  */
 @InternalApi
-class CreateTableHelper {
+public class CreateTableHelper {
   private static final Log LOG = LogFactory.getLog(CreateTableHelper.class);
 
   @InternalApi
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java
index 6b2e628a5d..45954c7762 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.bigtable.beam.sequencefiles;
 
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Predicate;
@@ -43,7 +44,8 @@
  * A {@link DoFn} function that converts a {@link Result} in the pipeline input to a {@link
  * Mutation} for output.
  */
-class HBaseResultToMutationFn extends DoFn<KV<ImmutableBytesWritable, Result>, Mutation> {
+@InternalApi
+public class HBaseResultToMutationFn extends DoFn<KV<ImmutableBytesWritable, Result>, Mutation> {
   private static Logger logger = LoggerFactory.getLogger(HBaseResultToMutationFn.class);
 
   private static final long serialVersionUID = 1L;
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java
index 62bad8d92b..7098a239d8 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.bigtable.beam.sequencefiles;
 
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
 import org.apache.beam.runners.dataflow.DataflowRunner;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
@@ -29,7 +30,8 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
-class Utils {
+@InternalApi
+public class Utils {
   private static final Log LOG = LogFactory.getLog(Utils.class);
 
   /**
@@ -98,7 +100,7 @@ public ResourceId apply(String input) {
    *
    * @param result
    */
-  static void waitForPipelineToFinish(PipelineResult result) {
+  public static void waitForPipelineToFinish(PipelineResult result) {
     try {
       // Check to see if we are creating a template.
       // This should throw {@link UnsupportedOperationException} when creating a template.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
new file mode 100644
index 0000000000..e62b3c8215
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSource.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Buffers the RangeHashes generated by {@link HadoopHashTableSource}. This is an optimization that
+ * allows {@link ComputeAndValidateHashFromBigtableDoFn} to issue fewer ReadRow APIs with larger row
+ * ranges.
+ *
+ * <p>Hadoop HashTable output is sorted by row-key and contains a row-range and hash. Beam
+ * Pcollection do not guarantee any ordering. To fetch a batch of ranges in 1 ReadRows operation,
+ * this source buffers then and outputs a List<RangeHash> guaranteeing the sorted order of ranges.
+ *
+ * <p>Emits a batch of sorted RangeHashes keyed by the start key of the first range.
+ */
+class BufferedHadoopHashTableSource extends BoundedSource<KV<String, List<RangeHash>>> {
+
+  private static final long serialVersionUID = 39842743L;
+
+  private static final int DEFAULT_BATCH_SIZE = 50;
+  private static final Coder<KV<String, List<RangeHash>>> CODER =
+      KvCoder.of(StringUtf8Coder.of(), ListCoder.of(RangeHashCoder.of()));;
+
+  // Max number of RangeHashes to buffer.
+  private final int maxBufferSize;
+  private final HadoopHashTableSource hashTableSource;
+
+  public BufferedHadoopHashTableSource(HadoopHashTableSource source) {
+    this(source, DEFAULT_BATCH_SIZE);
+  }
+
+  public BufferedHadoopHashTableSource(HadoopHashTableSource hashTableSource, int maxBufferSize) {
+    this.hashTableSource = hashTableSource;
+    this.maxBufferSize = maxBufferSize;
+  }
+
+  @Override
+  public List<? extends BoundedSource<KV<String, List<RangeHash>>>> split(
+      long desiredBundleSizeBytes, PipelineOptions options) throws IOException {
+
+    @SuppressWarnings("unchecked")
+    List<HadoopHashTableSource> splitHashTableSources =
+        (List<HadoopHashTableSource>) hashTableSource.split(desiredBundleSizeBytes, options);
+
+    List<BufferedHadoopHashTableSource> splitSources =
+        new ArrayList<>(splitHashTableSources.size());
+    // Keep the splits same as HashTableSource.
+    for (HadoopHashTableSource splitHashTableSource : splitHashTableSources) {
+      // Add the last range for [lastPartition, stopRow).
+      splitSources.add(new BufferedHadoopHashTableSource(splitHashTableSource));
+    }
+    return splitSources;
+  }
+
+  @Override
+  public Coder<KV<String, List<RangeHash>>> getOutputCoder() {
+    return CODER;
+  }
+
+  @Override
+  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    // HashTable data files don't expose a method to estimate size or lineCount.
+    return hashTableSource.getEstimatedSizeBytes(options);
+  }
+
+  @Override
+  public BoundedReader<KV<String, List<RangeHash>>> createReader(PipelineOptions options)
+      throws IOException {
+    return new BufferedHashBasedReader(this, hashTableSource.createReader(options));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof BufferedHadoopHashTableSource)) {
+      return false;
+    }
+    BufferedHadoopHashTableSource that = (BufferedHadoopHashTableSource) o;
+    return maxBufferSize == that.maxBufferSize
+        && Objects.equal(hashTableSource, that.hashTableSource);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(maxBufferSize, hashTableSource);
+  }
+
+  @Override
+  public String toString() {
+    return "BufferedHadoopHashTableSource ["
+        + immutableBytesToString(hashTableSource.startRowInclusive)
+        + ", "
+        + immutableBytesToString(hashTableSource.stopRowExclusive)
+        + "), maxBufferSize="
+        + maxBufferSize;
+  }
+
+  private static class BufferedHashBasedReader extends BoundedReader<KV<String, List<RangeHash>>> {
+
+    private final BoundedReader<RangeHash> hashReader;
+    private final BufferedHadoopHashTableSource source;
+
+    private List<RangeHash> buffer;
+
+    public BufferedHashBasedReader(
+        BufferedHadoopHashTableSource source, BoundedReader<RangeHash> hashReader) {
+      this.source = source;
+      this.hashReader = hashReader;
+      this.buffer = new ArrayList<>(source.maxBufferSize);
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      if (!hashReader.start()) {
+        // HashReader does not have any hashes, return empty reader.
+        return false;
+      }
+      // Start returned true, consume the current RangeHash.
+      buffer.add(hashReader.getCurrent());
+      bufferRangeHashes();
+      // Buffer is not empty, return true to consume the current buffer.
+      return true;
+    }
+
+    // Reads from hashReader and buffers the RangeHashes.
+    // Returns true if any RangeHashes were read from hashReader.
+    private boolean bufferRangeHashes() throws IOException {
+      boolean readRangeHashes = false;
+      while (buffer.size() < source.maxBufferSize && hashReader.advance()) {
+        readRangeHashes = true;
+        buffer.add(hashReader.getCurrent());
+      }
+      return readRangeHashes;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      // Reset the buffer for next batch.
+      buffer = new ArrayList<>(source.maxBufferSize);
+
+      return bufferRangeHashes();
+    }
+
+    @Override
+    public KV<String, List<RangeHash>> getCurrent() {
+      // getCurrent only gets called when buffer is not empty.
+      Preconditions.checkState(
+          !buffer.isEmpty(), "getCurrent() should only be called when start/advance return true.");
+      // GroupBy key is a string and not ImmutableBytesWritable because the WritableCoder is not
+      // deterministic. The outputted PCollection is grouped by the K and needs a deterministic
+      // coder. Having a String K leads to an unfortunate double encoding, ImmutableBytesWritable->
+      // HEX string -> UTF8 encoded string. The number of batches are significantly smaller than
+      // data fetched from Bigtable and should not have meaningful impact on the job performance.
+      return KV.of(Bytes.toStringBinary(buffer.get(0).startInclusive.copyBytes()), buffer);
+    }
+
+    @Override
+    public void close() throws IOException {
+      hashReader.close();
+    }
+
+    @Override
+    public BoundedSource<KV<String, List<RangeHash>>> getCurrentSource() {
+      return source;
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
new file mode 100644
index 0000000000..82e24b55ef
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFn.java
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.bigtable.repackaged.com.google.common.base.Preconditions;
+import com.google.bigtable.repackaged.com.google.common.collect.Lists;
+import com.google.cloud.bigtable.beam.AbstractCloudBigtableTableDoFn;
+import com.google.cloud.bigtable.beam.CloudBigtableConfiguration;
+import com.google.cloud.bigtable.beam.TemplateUtils;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
+import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor.BigtableResultHasher;
+
+/**
+ * A {@link DoFn} that takes a row range and hash from HBase and validates the hash from rows read
+ * from Cloud Bigtable.
+ */
+class ComputeAndValidateHashFromBigtableDoFn
+    extends AbstractCloudBigtableTableDoFn<KV<String, Iterable<List<RangeHash>>>, RangeHash> {
+
+  private static final long serialVersionUID = 2349094L;
+  private final ValueProvider<String> tableName;
+  private final ValueProvider<String> projectId;
+  private final ValueProvider<String> sourceHashDir;
+
+  private final TableHashWrapperFactory tableHashWrapperFactory;
+
+  // Counter for reporting matching and mismatching ranges. Names are similar to HBase sync-table
+  // job.
+  private final Counter matches = Metrics.counter("cbt-dataflow-validate", "ranges_matched");
+  private final Counter mismatches = Metrics.counter("cbt-dataflow-validate", "ranges_not_matched");
+
+  public ComputeAndValidateHashFromBigtableDoFn(SyncTableOptions options) {
+    super(TemplateUtils.BuildSyncTableConfig(options));
+    this.tableName = options.getBigtableTableId();
+    // Create a local copy of ValueProviders, PipelineOptions are not serializable.
+    projectId = options.getBigtableProject();
+    sourceHashDir = options.getHashTableOutputDir();
+    tableHashWrapperFactory = new TableHashWrapperFactory();
+  }
+
+  @VisibleForTesting
+  ComputeAndValidateHashFromBigtableDoFn(
+      CloudBigtableConfiguration config,
+      ValueProvider<String> tableName,
+      ValueProvider<String> projectId,
+      ValueProvider<String> sourceHashDir,
+      TableHashWrapperFactory factory) {
+    super(config);
+    this.tableName = tableName;
+    this.tableHashWrapperFactory = factory;
+    this.sourceHashDir = projectId;
+    this.projectId = sourceHashDir;
+  }
+
+  @ProcessElement
+  public void processElement(ProcessContext context) throws Exception {
+    List<List<RangeHash>> wrapperdRangeHashes = Lists.newArrayList(context.element().getValue());
+    // BufferedHadoopHashTableSource generates only 1 item per groupby key, key is startKey for the
+    // Sorted ranges.
+    Preconditions.checkState(
+        wrapperdRangeHashes.size() == 1, "Can not have multiple entries for a key");
+    List<RangeHash> rangeHashes = wrapperdRangeHashes.get(0);
+    Preconditions.checkState(!rangeHashes.isEmpty(), "Can not have empty ranges in DO_FN");
+
+    // If a metric is not logged, it is absent from all the metrics (as opposed to being
+    // 0). By logging a 0 value for the metrics we guarantee that they shows up on Dataflow UIs.
+    mismatches.inc(0);
+    matches.inc(0);
+
+    ImmutableBytesWritable rangeStartInclusive = rangeHashes.get(0).startInclusive;
+    ImmutableBytesWritable rangeEndExclusive =
+        rangeHashes.get(rangeHashes.size() - 1).stopExclusive;
+
+    BigtableResultHasher resultHasher = new BigtableResultHasher();
+    resultHasher.startBatch(rangeStartInclusive);
+
+    // Since all the row-ranges are sorted in HashTable's data files, 1 big scan can be used
+    // to read all the row ranges. Parallelism is achieved by splitting the HashTable's data
+    // files into smaller bundle of row-ranges in GroupBy.
+    ResultScanner scanner =
+        createBigtableScan(rangeStartInclusive.copyBytes(), rangeEndExclusive.copyBytes());
+
+    Iterator<RangeHash> rangeHashIterator = rangeHashes.iterator();
+    long numRows = 0;
+
+    RangeHash currentRangeHash = rangeHashIterator.next();
+
+    // Process each row and validate hashes
+    for (Result result : scanner) {
+      numRows++;
+      if (numRows % 10_000 == 0) {
+        // Heartbeat in logs in case a large scan gets hung.
+        DOFN_LOG.debug("Processed " + numRows + " rows ");
+      }
+
+      ImmutableBytesWritable rowKey = new ImmutableBytesWritable(result.getRow());
+
+      // Check if the rowKey belongs to current range, if not keep iterating through the
+      // rangeHashes until rowKey's range is found.
+      while (!isWithinUpperBound(currentRangeHash.stopExclusive, rowKey)) {
+        validateBatchHash(context, resultHasher, currentRangeHash);
+        // THIS SHOULD NEVER HAPPEN. Bigtable is being scanned till the last
+        // RangeHash.endKeyExclusive(), so bigtable's result should not outlast the
+        // rangeHashes.
+        Preconditions.checkState(
+            rangeHashIterator.hasNext(),
+            "Buffer reached to end while scan is still active at row : %s. "
+                + "Affected Range: [%s, %s)."
+                + immutableBytesToString(result.getRow())
+                + immutableBytesToString(rangeStartInclusive)
+                + immutableBytesToString(rangeEndExclusive));
+        currentRangeHash = rangeHashIterator.next();
+      }
+
+      // Always Hash the current row.
+      resultHasher.hashResult(result);
+    }
+
+    // Bigtable scan is finished at this point and rangeHashes may contain additional row ranges.
+    // Last range will always be unverified as the range end is exclusive and
+    // currentRow > rangeEndExclusive will never by true. Verify the last range.
+    validateBatchHash(context, resultHasher, currentRangeHash);
+
+    // If there are remaining ranges in the rangeHashes they all need to reported as mismatched as
+    // there is nothing in Cloud Bigtable for those row ranges.
+    // for (int i = bufferIndex; i < rangeHashes.size(); i++) {
+    while (rangeHashIterator.hasNext()) {
+      currentRangeHash = rangeHashIterator.next();
+      reportMismatch(context, currentRangeHash);
+    }
+
+    DOFN_LOG.debug(
+        "Finishing context by outputting {}  keys in range [{}, {}).",
+        rangeHashes.size(),
+        immutableBytesToString(rangeStartInclusive),
+        immutableBytesToString(rangeEndExclusive));
+  }
+
+  private ResultScanner createBigtableScan(byte[] startKeyInclusive, byte[] stopKeyExclusive)
+      throws IOException {
+    Table table = getConnection().getTable(TableName.valueOf(tableName.get()));
+    // Get the scan from TableHash, HashTable can be run to hash a small part of data (selected
+    // column families, timestamp range, maxVersions etc), this scan allows us to fetch the same
+    // data from Cloud Bigtable to match.
+    TableHashWrapper tableHash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+    Scan scan = tableHash.getScan();
+    // Set the workitem boundaries on the scan.
+    if (startKeyInclusive.length > 0) {
+      scan.withStartRow(startKeyInclusive, true);
+    }
+    if (stopKeyExclusive.length > 0) {
+      scan.withStopRow(stopKeyExclusive, false);
+    }
+
+    return table.getScanner(scan);
+  }
+
+  /**
+   * Determines if row >= stopExclusive for a row range (start, stopExclusive). Empty stopExclusive
+   * represents a range with no upper bound.
+   */
+  private static boolean isWithinUpperBound(
+      ImmutableBytesWritable stopExclusive, ImmutableBytesWritable row) {
+    return stopExclusive.equals(HConstants.EMPTY_END_ROW) || row.compareTo(stopExclusive) < 0;
+  }
+
+  private void validateBatchHash(
+      ProcessContext context, BigtableResultHasher resultHasher, RangeHash currentRangeHash) {
+    // The batch is always started, so its safe to finish the batch. If there were no rows, we will
+    // get a hash for empty batch.
+    resultHasher.finishBatch();
+    if (!resultHasher.getBatchHash().equals(currentRangeHash.hash)) {
+      reportMismatch(context, currentRangeHash);
+    } else {
+      matches.inc();
+    }
+    // Start a new batch
+    resultHasher.startBatch(currentRangeHash.stopExclusive);
+  }
+
+  private void reportMismatch(ProcessContext context, RangeHash currentRangeHash) {
+    mismatches.inc();
+    DOFN_LOG.info(
+        "MISMATCH ON RANGE [{}, {}).",
+        immutableBytesToString(currentRangeHash.startInclusive),
+        immutableBytesToString(currentRangeHash.stopExclusive));
+    context.output(currentRangeHash);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
new file mode 100644
index 0000000000..f6ecf21e24
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSource.java
@@ -0,0 +1,440 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import com.google.bigtable.repackaged.com.google.common.annotations.VisibleForTesting;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.beam.validation.TableHashWrapper.TableHashReader;
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.DefaultCoder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * A beam source to read output of Hadoop HashTable job. The source creates 1 workitem per HashTable
+ * data file and emits a row-range/hash pair.
+ */
+@InternalApi
+public class HadoopHashTableSource extends BoundedSource<RangeHash> implements Serializable {
+
+  private static final long serialVersionUID = 2383724L;
+
+  private static final Coder<RangeHash> CODER = RangeHashCoder.of();
+
+  /**
+   * A simple POJO encapsulating a row range and the corresponding hash generated by HashTable job.
+   * TODO Evaluate if we can use AutoValue for this class.
+   */
+  @DefaultCoder(RangeHashCoder.class)
+  public static class RangeHash {
+
+    public final ImmutableBytesWritable startInclusive;
+    public final ImmutableBytesWritable stopExclusive;
+    public final ImmutableBytesWritable hash;
+
+    private RangeHash(
+        ImmutableBytesWritable startInclusive,
+        ImmutableBytesWritable stopExclusive,
+        ImmutableBytesWritable hash) {
+      this.startInclusive = startInclusive;
+      this.stopExclusive = stopExclusive;
+      this.hash = hash;
+    }
+
+    static RangeHash of(
+        ImmutableBytesWritable startInclusive,
+        ImmutableBytesWritable stopExclusive,
+        ImmutableBytesWritable hash) {
+      Preconditions.checkNotNull(startInclusive);
+      Preconditions.checkNotNull(stopExclusive);
+      Preconditions.checkNotNull(hash);
+      return new RangeHash(startInclusive, stopExclusive, hash);
+    }
+
+    @Override
+    public String toString() {
+      return String.format(
+          "RangeHash{ range = [ %s, %s), hash: %s }",
+          immutableBytesToString(startInclusive),
+          immutableBytesToString(stopExclusive),
+          immutableBytesToString(hash));
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof RangeHash)) {
+        return false;
+      }
+      RangeHash rangeHash = (RangeHash) o;
+      return Objects.equal(startInclusive, rangeHash.startInclusive)
+          && Objects.equal(stopExclusive, rangeHash.stopExclusive)
+          && Objects.equal(hash, rangeHash.hash);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(startInclusive, stopExclusive, hash);
+    }
+  }
+
+  public static final Log LOG = LogFactory.getLog(HadoopHashTableSource.class);
+
+  private final ValueProvider<String> projectId;
+
+  // Path to the output of HashTable job. Usually in GCS.
+  private final ValueProvider<String> sourceHashDir;
+
+  // Row range owned by this source.
+  // The Start and Stop row are serialized in a custom way.
+  @VisibleForTesting @Nullable transient ImmutableBytesWritable startRowInclusive;
+
+  @VisibleForTesting @Nullable transient ImmutableBytesWritable stopRowExclusive;
+
+  private final TableHashWrapperFactory tableHashWrapperFactory;
+
+  /**
+   * Creates a HadoopHashTableSource that reads HashTable data from hashTableOutputDir in GCS bucket
+   * in project $(projectId).
+   */
+  public HadoopHashTableSource(
+      ValueProvider<String> projectId, ValueProvider<String> sourceHashDir) {
+    this(projectId, sourceHashDir, /*startRowInclusive*/ null, /*stopRowExclusive*/ null);
+  }
+
+  /**
+   * Constructor to initialize a HadoopHashTableSource for a given row-range. Used for creating
+   * split sources.
+   */
+  @VisibleForTesting
+  HadoopHashTableSource(
+      ValueProvider<String> projectId,
+      ValueProvider<String> sourceHashDir,
+      @Nullable ImmutableBytesWritable startRowInclusive,
+      @Nullable ImmutableBytesWritable stopRowExclusive) {
+    this(
+        projectId,
+        sourceHashDir,
+        startRowInclusive,
+        stopRowExclusive,
+        new TableHashWrapperFactory());
+  }
+
+  @VisibleForTesting
+  HadoopHashTableSource(
+      ValueProvider<String> projectId,
+      ValueProvider<String> hadoopHashTableOutputDir,
+      @Nullable ImmutableBytesWritable startRowInclusive,
+      @Nullable ImmutableBytesWritable stopRowExclusive,
+      TableHashWrapperFactory tableHashWrapperFactory) {
+    this.projectId = projectId;
+    this.sourceHashDir = hadoopHashTableOutputDir;
+    // startRow and stopRow will be null when the template is initialized. startRow and stopRow are
+    // read from the hashTableOutputDir, which is only available at pipeline runtime.
+    this.startRowInclusive = startRowInclusive;
+    this.stopRowExclusive = stopRowExclusive;
+    this.tableHashWrapperFactory = tableHashWrapperFactory;
+  }
+
+  @Override
+  public List<? extends BoundedSource<RangeHash>> split(
+      long desiredBundleSizeBytes, PipelineOptions options) throws IOException {
+    // This method relies on the partitioning done by HBase-HashTable job. There is a possibility
+    // of stragglers. SyncTable handles it by using a group by and further splitting workitems.
+    TableHashWrapper hash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+
+    ImmutableList<ImmutableBytesWritable> partitions = hash.getPartitions();
+    int numPartitions = partitions.size();
+
+    List<HadoopHashTableSource> splitSources = new ArrayList<>(numPartitions + 1);
+    if (numPartitions == 0) {
+      // There are 0 partitions and 1 hashfile, return single source with full key range.
+      splitSources.add(
+          new HadoopHashTableSource(
+              projectId,
+              sourceHashDir,
+              hash.getStartRow(),
+              hash.getStopRow(),
+              tableHashWrapperFactory));
+      return splitSources;
+    }
+
+    // Use the HashTable start key. The value is HConstants.EMPTY_START_ROW for full table scan.
+    ImmutableBytesWritable nextStartRow = hash.getStartRow();
+    ImmutableBytesWritable stopRow = hash.getStopRow();
+
+    // The output of HashTable is organized as partition file and a set of datafiles.
+    // Partition file contains a list of partitions, these partitions split the key-range of a table
+    // into roughly equal row-ranges and hashes for these row-ranges are stored in a single
+    // datafile.
+    //
+    // There are always numPartitions +1 data files. Datafile(i) covers hashes for [partition{i-1},
+    // partition{i}).
+    // So a partition file containing entries [b,f] for a table with row range [a,z] will have 3
+    // data files containing hashes.
+    // file0 will contain [a(nextStartRow), b), file1 will contain [b,f), and file3 will contain
+    // [f,z(stopRow))
+    for (int i = 0; i < numPartitions; i++) {
+      // TODO make a utility function that generates [start, end) format from start/end.
+      LOG.debug(
+          "Adding: ["
+              + immutableBytesToString(nextStartRow.get())
+              + ", "
+              + immutableBytesToString(partitions.get(i).get())
+              + ")");
+      splitSources.add(
+          new HadoopHashTableSource(
+              projectId, sourceHashDir, nextStartRow, partitions.get(i), tableHashWrapperFactory));
+      nextStartRow = partitions.get(i);
+    }
+    // Add the last range for [lastPartition, stopRow).
+    LOG.debug(
+        "Adding: ["
+            + immutableBytesToString(nextStartRow.get())
+            + ", "
+            + immutableBytesToString(stopRow.get())
+            + ")");
+    // Add the last range for [lastPartition, stopRow).
+    splitSources.add(
+        new HadoopHashTableSource(
+            projectId, sourceHashDir, nextStartRow, stopRow, tableHashWrapperFactory));
+    LOG.info("Returning " + splitSources.size() + " sources from " + numPartitions + " partitions");
+    return splitSources;
+  }
+
+  @Override
+  public Coder<RangeHash> getOutputCoder() {
+    return CODER;
+  }
+
+  @Override
+  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    // HashTable data files don't expose a method to estimate size or lineCount.
+    return 0;
+  }
+
+  @Override
+  public BoundedReader<RangeHash> createReader(PipelineOptions options) throws IOException {
+    TableHashWrapper hash =
+        tableHashWrapperFactory.getTableHash(projectId.get(), sourceHashDir.get());
+
+    // The row range for an un-split source is determined from the output of HashTable job.
+    // HashTableOutputDir is a runtime parameter and hence not available at construction time, so
+    // populate the start and stop here.
+    if (startRowInclusive == null || stopRowExclusive == null) {
+      startRowInclusive = hash.getStartRow();
+      stopRowExclusive = hash.getStopRow();
+    }
+
+    return new HashBasedReader(
+        this,
+        startRowInclusive,
+        stopRowExclusive,
+        hash.newReader(
+            SyncTableUtils.createConfiguration(this.projectId.get(), this.sourceHashDir.get()),
+            startRowInclusive));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof HadoopHashTableSource)) {
+      return false;
+    }
+    HadoopHashTableSource that = (HadoopHashTableSource) o;
+    return Objects.equal(projectId, that.projectId)
+        && Objects.equal(sourceHashDir, that.sourceHashDir)
+        && Objects.equal(startRowInclusive, that.startRowInclusive)
+        && Objects.equal(stopRowExclusive, that.stopRowExclusive);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(projectId, sourceHashDir, startRowInclusive, stopRowExclusive);
+  }
+
+  @Override
+  public String toString() {
+    return "HadoopHashTableSource ["
+        + immutableBytesToString(startRowInclusive)
+        + ", "
+        + immutableBytesToString(stopRowExclusive)
+        + ')';
+  }
+
+  private void writeObject(ObjectOutputStream s) throws IOException {
+    s.defaultWriteObject();
+    // Start and Stop can be null, write a boolean to indicate if start/stop is expected.
+    if (startRowInclusive == null) {
+      s.writeBoolean(false);
+    } else {
+      s.writeBoolean(true);
+      s.writeObject(startRowInclusive.copyBytes());
+    }
+
+    if (stopRowExclusive == null) {
+      s.writeBoolean(false);
+    } else {
+      s.writeBoolean(true);
+      s.writeObject(stopRowExclusive.copyBytes());
+    }
+  }
+
+  private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
+    s.defaultReadObject();
+    // start/stop can be null, they are preceded by a boolean indicating their presence.
+    if (s.readBoolean() == true) {
+      startRowInclusive = new ImmutableBytesWritable((byte[]) s.readObject());
+    }
+    if (s.readBoolean() == true) {
+      stopRowExclusive = new ImmutableBytesWritable((byte[]) s.readObject());
+    }
+  }
+
+  @VisibleForTesting
+  static class HashBasedReader extends BoundedReader<RangeHash> {
+
+    private final HadoopHashTableSource source;
+    private final TableHashReader reader;
+
+    @VisibleForTesting final ImmutableBytesWritable startRowInclusive;
+    @VisibleForTesting final ImmutableBytesWritable stopRowExclusive;
+
+    // Flag indicating that this workitem is finished.
+    private boolean isDone = false;
+    private ImmutableBytesWritable currentRangeStartKey;
+    // Hash for the current range.
+    private ImmutableBytesWritable currentHash;
+    private RangeHash currentRangeHash;
+
+    public HashBasedReader(
+        HadoopHashTableSource source,
+        ImmutableBytesWritable startRowInclusive,
+        ImmutableBytesWritable stopRowExclusive,
+        TableHashReader reader) {
+      this.source = source;
+      this.startRowInclusive = startRowInclusive;
+      this.stopRowExclusive = stopRowExclusive;
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      LOG.debug(
+          "Starting a new reader at key range ["
+              + immutableBytesToString(startRowInclusive)
+              + " ,"
+              + immutableBytesToString(stopRowExclusive)
+              + ").");
+
+      if (readNextKey()) {
+        // Dataflow calls start, followed by getCurrent. HashBased reader needs to read on TableHash
+        // twice to return a RangeHash since it specifies both range-start and range-end.
+        advance();
+        return true;
+      }
+
+      isDone = true;
+      return false;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      if (isDone) {
+        LOG.debug("Ending workitem at key " + immutableBytesToString(currentRangeStartKey) + " .");
+        return false;
+      }
+
+      ImmutableBytesWritable startKey = this.currentRangeStartKey;
+      ImmutableBytesWritable hash = this.currentHash;
+
+      // if there is nothing to read, we are done. readNextKey advances the currentRangeStartKey.
+      isDone = !readNextKey();
+      currentRangeHash = RangeHash.of(startKey, currentRangeStartKey, hash);
+
+      return true;
+    }
+
+    // Returns true if a key can be read for this workitem.
+    private boolean readNextKey() throws IOException {
+      if (reader.next()) {
+        currentRangeStartKey = reader.getCurrentKey();
+        if ( // StopRow is not set, everything is in bounds.
+        (stopRowExclusive.equals(HConstants.EMPTY_END_ROW)
+            || currentRangeStartKey.compareTo(stopRowExclusive) < 0)) { // currentKey < stopKey
+          // There is a key to read and the key is within the bounds of this workitem. Return true.
+          currentHash = reader.getCurrentHash();
+          return true;
+        } else {
+          // There is a key to read but its outside of the bounds of this workitem.
+          currentHash = null;
+          return false;
+        }
+      }
+
+      // Nothing left to read for this workitem. Next range would have started from
+      // stopRowExclusive.
+      currentRangeStartKey = stopRowExclusive;
+      currentHash = null;
+      return false;
+    }
+
+    @Override
+    public RangeHash getCurrent() {
+      return currentRangeHash;
+    }
+
+    @Override
+    public void close() throws IOException {
+      LOG.info(
+          "Finishing a reader for key range ["
+              + immutableBytesToString(startRowInclusive)
+              + " ,"
+              + immutableBytesToString(stopRowExclusive)
+              + "). Ending at "
+              + immutableBytesToString(currentRangeStartKey));
+      reader.close();
+    }
+
+    @Override
+    public BoundedSource<RangeHash> getCurrentSource() {
+      return source;
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
new file mode 100644
index 0000000000..d6341a08f2
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/RangeHashCoder.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InvalidObjectException;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.List;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/** Coder used by beam to encode/decode @{@link RangeHash} objects. */
+public class RangeHashCoder extends Coder<RangeHash> {
+
+  public static Coder<RangeHash> of() {
+    return new RangeHashCoder();
+  }
+
+  @Override
+  public void encode(RangeHash value, OutputStream outStream) throws IOException {
+    if (value == null) {
+      throw new CoderException("Can not encode null objects.");
+    }
+    DataOutputStream dataOutputStream = new DataOutputStream(outStream);
+    // RangeHash fields can never be null.
+    value.startInclusive.write(dataOutputStream);
+    value.stopExclusive.write(dataOutputStream);
+    value.hash.write(dataOutputStream);
+  }
+
+  @Override
+  public RangeHash decode(InputStream inStream) throws IOException {
+    DataInputStream dataInputStream = new DataInputStream(inStream);
+
+    ImmutableBytesWritable startInclusive = new ImmutableBytesWritable();
+    startInclusive.readFields(dataInputStream);
+
+    ImmutableBytesWritable stopExclusive = new ImmutableBytesWritable();
+    stopExclusive.readFields(dataInputStream);
+
+    ImmutableBytesWritable hash = new ImmutableBytesWritable();
+    hash.readFields(dataInputStream);
+
+    return RangeHash.of(startInclusive, stopExclusive, hash);
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    // This is a deterministic coder as it writes the byte[] in order.
+  }
+
+  /**
+   * !!! DO NOT DELETE !!!
+   *
+   * <p>See readObjectNoData method in:
+   * https://docs.oracle.com/javase/7/docs/platform/serialization/spec/input.html#6053.
+   *
+   * <p>Disable backwards compatibility with previous versions that were serialized.
+   *
+   * @throws InvalidObjectException
+   */
+  @SuppressWarnings("unused")
+  private void readObjectNoData() throws InvalidObjectException {
+    throw new InvalidObjectException("Hash data required");
+  }
+
+  @Override
+  protected Object clone() throws CloneNotSupportedException {
+    return super.clone();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return other instanceof RangeHashCoder;
+  }
+
+  @Override
+  public int hashCode() {
+    return RangeHashCoder.class.hashCode();
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
new file mode 100644
index 0000000000..56b38fc3cb
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableJob.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly;
+import com.google.bigtable.repackaged.com.google.gson.Gson;
+import com.google.cloud.bigtable.beam.sequencefiles.Utils;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.annotations.VisibleForTesting;
+import java.util.List;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
+import org.apache.beam.sdk.metrics.MetricResult;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * A job that takes HBase HashTable output and compares the hashes from Cloud Bigtable table.
+ *
+ * <p>Execute the following command to run the job directly:
+ *
+ * <pre>
+ *   mvn compile exec:java \
+ *      -DmainClass=com.google.cloud.bigtable.beam.validation.SyncTableJob \
+ *      -Dexec.args="--runner=DataflowRunner \
+ *            --project=$PROJECT \
+ *            --bigtableInstanceId=$INSTANCE \
+ *            --bigtableTableId=$TABLE \
+ *            --sourceHashDir=$SOURCE_HASH_DIR \
+ *            --outputPrefix=$OUtPUT_PREFIX \
+ *            --stagingLocation=$STAGING_LOC \
+ *            --tempLocation=$TMP_LOC \
+ *            --region=$REGION \
+ *            --workerZone=$WORKER_ZONE"
+ * </pre>
+ *
+ * <p>Execute the following command to create the Dataflow template:
+ *
+ * <pre>
+ * mvn compile exec:java \
+ *   -DmainClass=com.google.cloud.bigtable.beam.validation.SyncTableJob \
+ *   -Dexec.args="--runner=DataflowRunner \
+ *                --project=$PROJECT \
+ *                --stagingLocation=gs://$STAGING_PATH \
+ *                --templateLocation=gs://$TEMPLATE_PATH \
+ *                --wait=false"
+ * </pre>
+ *
+ * <p>There are a few ways to run the pipeline using the template. See Dataflow doc for details:
+ * https://cloud.google.com/dataflow/docs/templates/executing-templates. Optionally, you can upload
+ * a metadata file that contains information about the runtime parameters that can be used for
+ * parameter validation purpose and more. A sample metadata file can be found at
+ * "src/main/resources/SyncTableJob_metadata".
+ *
+ * <p>An example using gcloud command line:
+ *
+ * <pre>
+ * gcloud beta dataflow jobs run $JOB_NAME \
+ *   --gcs-location gs://$TEMPLATE_PATH \
+ *   --parameters bigtableProject=$PROJECT,bigtableInstanceId=$INSTANCE,bigtableTableId=$TABLE,sourceHashDir=gs://$SOURCE_HASH_DIR,outputPrefix=$OUTPUT_PREFIX
+ * </pre>
+ */
+@InternalExtensionOnly
+public class SyncTableJob {
+
+  private static final Log LOG = LogFactory.getLog(SyncTableJob.class);
+
+  public interface SyncTableOptions extends GcpOptions {
+
+    @Description("This Bigtable App Profile id.")
+    ValueProvider<String> getBigtableAppProfileId();
+
+    @SuppressWarnings("unused")
+    void setBigtableAppProfileId(ValueProvider<String> appProfileId);
+
+    @Description("The project that contains the table to export. Defaults to --project.")
+    @Default.InstanceFactory(Utils.DefaultBigtableProjectFactory.class)
+    ValueProvider<String> getBigtableProject();
+
+    @SuppressWarnings("unused")
+    void setBigtableProject(ValueProvider<String> projectId);
+
+    @Description("The Bigtable instance id that contains the table to export.")
+    ValueProvider<String> getBigtableInstanceId();
+
+    @SuppressWarnings("unused")
+    void setBigtableInstanceId(ValueProvider<String> instanceId);
+
+    @Description("The Bigtable table id to export.")
+    ValueProvider<String> getBigtableTableId();
+
+    @SuppressWarnings("unused")
+    void setBigtableTableId(ValueProvider<String> tableId);
+
+    @Description("HBase HashTable job output dir.")
+    ValueProvider<String> getHashTableOutputDir();
+
+    @SuppressWarnings("unused")
+    // Rename it to sourceHashDir as in HBase sync table job.
+    void setHashTableOutputDir(ValueProvider<String> hashTableOutputDir);
+
+    @Description("File pattern for files containing mismatched row ranges.")
+    ValueProvider<String> getOutputPrefix();
+
+    @SuppressWarnings("unused")
+    void setOutputPrefix(ValueProvider<String> outputPrefix);
+
+    // When creating a template, this flag must be set to false.
+    @Description("Wait for pipeline to finish.")
+    @Default.Boolean(true)
+    boolean getWait();
+
+    @SuppressWarnings("unused")
+    void setWait(boolean wait);
+  }
+
+  public static void main(String[] args) {
+    PipelineOptionsFactory.register(SyncTableOptions.class);
+
+    SyncTableOptions opts =
+        PipelineOptionsFactory.fromArgs(args).withValidation().as(SyncTableOptions.class);
+
+    LOG.info("===> Building Pipeline");
+    Pipeline pipeline = buildPipeline(opts);
+
+    LOG.info("===> Running Pipeline");
+    PipelineResult result = pipeline.run();
+
+    if (opts.getWait()) {
+      Utils.waitForPipelineToFinish(result);
+    }
+
+    // Log all the counters for number of matches and number of mismatches.
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    for (MetricResult<Long> counter : metrics.getCounters()) {
+      LOG.warn(counter.getName() + ":" + counter.getAttempted());
+    }
+  }
+
+  @VisibleForTesting
+  public static Pipeline buildPipeline(SyncTableOptions opts) {
+    Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts));
+    pipeline
+        .apply(
+            "Read HBase HashTable output",
+            Read.from(
+                new BufferedHadoopHashTableSource(
+                    new HadoopHashTableSource(
+                        opts.getBigtableProject(), opts.getHashTableOutputDir()))))
+        .apply(
+            "group by and create granular workitems", GroupByKey.<String, List<RangeHash>>create())
+        .apply("validate hash", ParDo.of(new ComputeAndValidateHashFromBigtableDoFn(opts)))
+        .apply("Serialize the ranges", MapElements.via(new RangeHashToString()))
+        .apply("Write to file", TextIO.write().to(opts.getOutputPrefix()).withSuffix(".txt"));
+    return pipeline;
+  }
+
+  static class RangeHashToString extends SimpleFunction<RangeHash, String> {
+    // TODO maybe explore a sequenceFile sink for RangeHash. Hadoop jobs using this output may be
+    // easier to write for sequence file.
+    private static final Gson GSON = new Gson();
+
+    @Override
+    public String apply(RangeHash input) {
+      return GSON.toJson(input);
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
new file mode 100644
index 0000000000..cc92bea6a4
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/SyncTableUtils.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/** Utility class for SyncTable job. */
+@InternalApi
+public class SyncTableUtils {
+
+  private SyncTableUtils() {}
+
+  public static String immutableBytesToString(ImmutableBytesWritable bytes) {
+    if (bytes == null) {
+      return "";
+    }
+    return immutableBytesToString(bytes.get());
+  }
+
+  public static String immutableBytesToString(byte[] bytes) {
+    return Bytes.toStringBinary(bytes);
+  }
+
+  /**
+   * Creates a HBase configuration for reading HashTable output from GCS bucket located in
+   * projectId.
+   *
+   * @param projectId project containing the GCS bucket holding hashtable output.
+   * @param sourceHashDir location of hashtable output from HBase.
+   * @return
+   */
+  public static Configuration createConfiguration(String projectId, String sourceHashDir) {
+    Configuration conf = HBaseConfiguration.create();
+    conf.set("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS");
+    conf.set("fs.gs.project.id", projectId);
+    conf.set("fs.defaultFS", sourceHashDir);
+    conf.set("google.cloud.auth.service.account.enable", "true");
+    return conf;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
new file mode 100644
index 0000000000..55200570ed
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapper.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import com.google.common.collect.ImmutableList;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.Serializable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * Wraps HashTable.TableHash object and delegates the calls to it. This class exposes the minimal
+ * interface required from TableHash. This class is required for mocking purposes in unit tests.
+ */
+@InternalApi
+public interface TableHashWrapper extends Serializable {
+
+  int getNumHashFiles();
+
+  ImmutableList<ImmutableBytesWritable> getPartitions();
+
+  ImmutableBytesWritable getStartRow();
+
+  ImmutableBytesWritable getStopRow();
+
+  Scan getScan();
+
+  TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow);
+
+  interface TableHashReader extends Closeable {
+    boolean next() throws IOException;
+
+    ImmutableBytesWritable getCurrentKey();
+
+    ImmutableBytesWritable getCurrentHash();
+
+    void close() throws IOException;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
new file mode 100644
index 0000000000..a4e3544519
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperFactory.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.createConfiguration;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import java.io.IOException;
+import java.io.Serializable;
+
+/** Factory to create a TableHashWrapper. */
+@InternalApi
+public class TableHashWrapperFactory implements Serializable {
+
+  private static final long serialVersionUID = 265433454L;
+
+  public TableHashWrapper getTableHash(String projectId, String sourceHashDir) throws IOException {
+    return TableHashWrapperImpl.create(
+        createConfiguration(projectId, sourceHashDir), sourceHashDir);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
new file mode 100644
index 0000000000..b04bd538a6
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/validation/TableHashWrapperImpl.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.cloud.bigtable.beam.validation.SyncTableUtils.immutableBytesToString;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash.Reader;
+
+class TableHashWrapperImpl implements TableHashWrapper {
+
+  static TableHashWrapper create(Configuration conf, String hashTableOutputDir) throws IOException {
+    TableHash tableHash = TableHash.read(conf, new Path(hashTableOutputDir));
+
+    TableHashWrapper tableHashWrapper = new TableHashWrapperImpl(tableHash);
+    Preconditions.checkArgument(
+        tableHashWrapper.getNumHashFiles() == (tableHashWrapper.getPartitions().size() + 1),
+        "Corrupt hashtable output. %d hash files for %d partitions. Expected %d files.",
+        tableHashWrapper.getNumHashFiles(),
+        tableHashWrapper.getPartitions().size(),
+        tableHashWrapper.getPartitions().size() + 1);
+    return tableHashWrapper;
+  }
+
+  private final TableHash hash;
+
+  private TableHashWrapperImpl(TableHash hash) {
+    this.hash = hash;
+  }
+
+  public int getNumHashFiles() {
+    return BigtableTableHashAccessor.getNumHashFiles(hash);
+  }
+
+  public ImmutableList<ImmutableBytesWritable> getPartitions() {
+    return BigtableTableHashAccessor.getPartitions(hash);
+  }
+
+  public ImmutableBytesWritable getStartRow() {
+    return BigtableTableHashAccessor.getStartRow(hash);
+  }
+
+  public ImmutableBytesWritable getStopRow() {
+    return BigtableTableHashAccessor.getStopRow(hash);
+  }
+
+  public Scan getScan() {
+    try {
+      return BigtableTableHashAccessor.getScan(hash);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to init a scan from TableHash: ", e);
+    }
+  }
+
+  public TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow) {
+    try {
+      return TableHashReaderImpl.create(hash.newReader(conf, startRow));
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Failed to open reader at " + immutableBytesToString(startRow.copyBytes()), e);
+    }
+  }
+
+  static class TableHashReaderImpl implements TableHashReader {
+
+    private final Reader reader;
+
+    static TableHashReaderImpl create(TableHash.Reader reader) {
+      Preconditions.checkNotNull(reader, "Reader can not be null.");
+      return new TableHashReaderImpl(reader);
+    }
+
+    private TableHashReaderImpl(TableHash.Reader reader) {
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean next() throws IOException {
+      return reader.next();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentKey() {
+      return reader.getCurrentKey();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentHash() {
+      return reader.getCurrentHash();
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
new file mode 100644
index 0000000000..a7db0add1c
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/org/apache/hadoop/hbase/mapreduce/BigtableTableHashAccessor.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.HashTable.ResultHasher;
+import org.apache.hadoop.hbase.mapreduce.HashTable.TableHash;
+
+/** A helper class to access package private fields of HashTable.TableHash. */
+@InternalApi
+public class BigtableTableHashAccessor {
+
+  // Restrict object creation. This class should only be used to access state from TableHash.
+  private BigtableTableHashAccessor() {}
+
+  public static int getNumHashFiles(TableHash hash) {
+    return hash.numHashFiles;
+  }
+
+  public static ImmutableList<ImmutableBytesWritable> getPartitions(TableHash hash) {
+    return ImmutableList.copyOf(hash.partitions);
+  }
+
+  public static ImmutableBytesWritable getStartRow(TableHash hash) {
+    return new ImmutableBytesWritable(hash.startRow);
+  }
+
+  public static ImmutableBytesWritable getStopRow(TableHash hash) {
+    return new ImmutableBytesWritable(hash.stopRow);
+  }
+
+  public static Scan getScan(TableHash hash) throws IOException {
+    return hash.initScan();
+  }
+
+  // Wrapper to access package private class ResultHasher. Delegates all the calls to underlying
+  // TableHash.ResultHasher, helps in mocking for unit tests.
+  public static class BigtableResultHasher {
+    private final ResultHasher hasher;
+
+    public BigtableResultHasher() {
+      hasher = new ResultHasher();
+    }
+
+    public void startBatch(ImmutableBytesWritable batchStartKey) {
+      hasher.startBatch(batchStartKey);
+    }
+
+    public void finishBatch() {
+      hasher.finishBatch();
+    }
+
+    public ImmutableBytesWritable getBatchHash() {
+      return hasher.getBatchHash();
+    }
+
+    public void hashResult(Result result) {
+      hasher.hashResult(result);
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
new file mode 100644
index 0000000000..6e66d3e096
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
@@ -0,0 +1,133 @@
+// Run from HBase shell. Run `hbase shell` from unix terminal on HBase master.
+create 'test', 'cf', {SPLITS => ["1", "2", "3", "4", "5", "6", "7", "8", "9"]}
+put 'test','1', 'cf:a', 'value1', 100
+put 'test','2', 'cf:a', 'value2', 100
+put 'test','3', 'cf:a', 'value3', 100
+put 'test','4', 'cf:a', 'value4', 100
+put 'test','5', 'cf:a', 'value5', 100
+put 'test','6', 'cf:a', 'value6', 100
+put 'test','7', 'cf:a', 'value7', 100
+put 'test','8', 'cf:a', 'value8', 100
+put 'test','9', 'cf:a', 'value9', 100
+put 'test','10', 'cf:a', 'value10', 100
+put 'test','11', 'cf:a', 'value11', 100
+put 'test','12', 'cf:a', 'value12', 100
+put 'test','13', 'cf:a', 'value13', 100
+put 'test','14', 'cf:a', 'value14', 100
+put 'test','15', 'cf:a', 'value15', 100
+put 'test','16', 'cf:a', 'value16', 100
+put 'test','17', 'cf:a', 'value17', 100
+put 'test','18', 'cf:a', 'value18', 100
+put 'test','19', 'cf:a', 'value19', 100
+put 'test','20', 'cf:a', 'value20', 100
+put 'test','21', 'cf:a', 'value21', 100
+put 'test','22', 'cf:a', 'value22', 100
+put 'test','23', 'cf:a', 'value23', 100
+put 'test','24', 'cf:a', 'value24', 100
+put 'test','25', 'cf:a', 'value25', 100
+put 'test','26', 'cf:a', 'value26', 100
+put 'test','27', 'cf:a', 'value27', 100
+put 'test','28', 'cf:a', 'value28', 100
+put 'test','29', 'cf:a', 'value29', 100
+put 'test','30', 'cf:a', 'value30', 100
+put 'test','31', 'cf:a', 'value31', 100
+put 'test','32', 'cf:a', 'value32', 100
+put 'test','33', 'cf:a', 'value33', 100
+put 'test','34', 'cf:a', 'value34', 100
+put 'test','35', 'cf:a', 'value35', 100
+put 'test','36', 'cf:a', 'value36', 100
+put 'test','37', 'cf:a', 'value37', 100
+put 'test','38', 'cf:a', 'value38', 100
+put 'test','39', 'cf:a', 'value39', 100
+put 'test','40', 'cf:a', 'value40', 100
+put 'test','41', 'cf:a', 'value41', 100
+put 'test','42', 'cf:a', 'value42', 100
+put 'test','43', 'cf:a', 'value43', 100
+put 'test','44', 'cf:a', 'value44', 100
+put 'test','45', 'cf:a', 'value45', 100
+put 'test','46', 'cf:a', 'value46', 100
+put 'test','47', 'cf:a', 'value47', 100
+put 'test','48', 'cf:a', 'value48', 100
+put 'test','49', 'cf:a', 'value49', 100
+put 'test','50', 'cf:a', 'value50', 100
+put 'test','51', 'cf:a', 'value51', 100
+put 'test','52', 'cf:a', 'value52', 100
+put 'test','53', 'cf:a', 'value53', 100
+put 'test','54', 'cf:a', 'value54', 100
+put 'test','55', 'cf:a', 'value55', 100
+put 'test','56', 'cf:a', 'value56', 100
+put 'test','57', 'cf:a', 'value57', 100
+put 'test','58', 'cf:a', 'value58', 100
+put 'test','59', 'cf:a', 'value59', 100
+put 'test','60', 'cf:a', 'value60', 100
+put 'test','61', 'cf:a', 'value61', 100
+put 'test','62', 'cf:a', 'value62', 100
+put 'test','63', 'cf:a', 'value63', 100
+put 'test','64', 'cf:a', 'value64', 100
+put 'test','65', 'cf:a', 'value65', 100
+put 'test','66', 'cf:a', 'value66', 100
+put 'test','67', 'cf:a', 'value67', 100
+put 'test','68', 'cf:a', 'value68', 100
+put 'test','69', 'cf:a', 'value69', 100
+put 'test','70', 'cf:a', 'value70', 100
+put 'test','71', 'cf:a', 'value71', 100
+put 'test','72', 'cf:a', 'value72', 100
+put 'test','73', 'cf:a', 'value73', 100
+put 'test','74', 'cf:a', 'value74', 100
+put 'test','75', 'cf:a', 'value75', 100
+put 'test','76', 'cf:a', 'value76', 100
+put 'test','77', 'cf:a', 'value77', 100
+put 'test','78', 'cf:a', 'value78', 100
+put 'test','79', 'cf:a', 'value79', 100
+put 'test','80', 'cf:a', 'value80', 100
+put 'test','81', 'cf:a', 'value81', 100
+put 'test','82', 'cf:a', 'value82', 100
+put 'test','83', 'cf:a', 'value83', 100
+put 'test','84', 'cf:a', 'value84', 100
+put 'test','85', 'cf:a', 'value85', 100
+put 'test','86', 'cf:a', 'value86', 100
+put 'test','87', 'cf:a', 'value87', 100
+put 'test','88', 'cf:a', 'value88', 100
+put 'test','89', 'cf:a', 'value89', 100
+put 'test','90', 'cf:a', 'value90', 100
+put 'test','91', 'cf:a', 'value91', 100
+put 'test','92', 'cf:a', 'value92', 100
+put 'test','93', 'cf:a', 'value93', 100
+put 'test','94', 'cf:a', 'value94', 100
+put 'test','95', 'cf:a', 'value95', 100
+put 'test','96', 'cf:a', 'value96', 100
+put 'test','97', 'cf:a', 'value97', 100
+put 'test','98', 'cf:a', 'value98', 100
+put 'test','99', 'cf:a', 'value99', 100
+put 'test','100', 'cf:a', 'value100', 100
+snapshot 'test', 'test-snapshot'
+list_snapshots
+
+
+////////////////////Run from Unix shell on HBase master node//////////////////
+// Export the snapshot
+hbase org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot test-snapshot -copy-to /integration-test/data -mappers 16
+
+// Create the hashes for the table. Run the command from unix shell on an HBase
+// node.
+hbase org.apache.hadoop.hbase.mapreduce.HashTable --batchsize=10 --numhashfiles=10 test /integration-test/hashtable
+
+// Export the data into GCS
+hadoop fs -copyToLocal /integration-test /tmp/
+gsutil cp -r /tmp/integration-test gs://<my-bucket>/
+
+// GCS bucket should look like this:
+$ gsutil ls gs://<my-bucket>/integration-test/data
+gs://<my-bucket>/integration-test/data/
+gs://<my-bucket>/integration-test/data/.hbase-snapshot/
+gs://<my-bucket>/integration-test/data/archive/
+$ gsutil ls gs://<my-bucket>/integration-test/hashtable
+gs://<my-bucket>/integration-test/hashtable/manifest
+gs://<my-bucket>/integration-test/hashtable/partitions
+gs://<my-bucket>/integration-test/hashtable/hashes/
+
+// Run from HBase shell. Run `hbase shell` from unix terminal on HBase master.
+// clean up the table
+disable 'test'
+drop 'test'
+exit
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
new file mode 100644
index 0000000000..03ac02e452
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
@@ -0,0 +1,2 @@
+
+test-snapshottestϹ���. (@���������
\ No newline at end of file
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/data.manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/data.manifest
new file mode 100644
index 0000000000..6439f06130
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/.hbase-snapshot/test-snapshot/data.manifest differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b
new file mode 100644
index 0000000000..1b91b948d8
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/01340515889e8ec5014bbdbfa4fd4689/cf/0ad53893d268478f9b2484cbb6016d9b differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d
new file mode 100644
index 0000000000..951eb512ac
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/156b320f3ebe472a1ae56a2f6930a676/cf/9926df0da08b4f51a33517afb040f82d differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b
new file mode 100644
index 0000000000..dc89f02ec2
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/313460ce1b714784d36c64bcd01f9e2c/cf/966e85699fdd4680a8c6fbf4b41b6e4b differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe
new file mode 100644
index 0000000000..c7fb208f72
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/3bfc13b0a9bf8148a91788a8d2b60117/cf/bab07e8089634e629a4c111ea2b415fe differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d
new file mode 100644
index 0000000000..7638f6eabb
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/5bc31088b2daee7903f5b3d3a52f7ebf/cf/7fef5694213b4be0ad79f79c45200c2d differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b
new file mode 100644
index 0000000000..c6ba1f760b
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/7c4a9137853573c8d671264dc0b31f89/cf/f8d40658d79b4a7191f21bcf14ae289b differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab
new file mode 100644
index 0000000000..5a757daec8
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/818d6b145a50cfc3bf8ee865486fdda3/cf/afe596ef5c61440983da2dcb54d581ab differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2
new file mode 100644
index 0000000000..d29619e3ec
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/8c2101799fadc18613082a495d11e4ea/cf/2c766f1fc8eb460dbfa9a3803138c9b2 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85
new file mode 100644
index 0000000000..337b5f9280
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/data/archive/data/default/test/f1ef86b666a891d8c77f0eada4d1a15c/cf/e59edc08de6d441689288f04c7c0fe85 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/_SUCCESS b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/_SUCCESS
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data
new file mode 100644
index 0000000000..26334294df
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index
new file mode 100644
index 0000000000..f7ac1fc941
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00000/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data
new file mode 100644
index 0000000000..87b715673c
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index
new file mode 100644
index 0000000000..4edcbd1ed5
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00001/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data
new file mode 100644
index 0000000000..4b59b346f0
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index
new file mode 100644
index 0000000000..4169ee8258
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00002/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data
new file mode 100644
index 0000000000..a05197b51d
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index
new file mode 100644
index 0000000000..9228013bfa
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00003/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data
new file mode 100644
index 0000000000..6e29b085e7
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index
new file mode 100644
index 0000000000..245c2ceb3f
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00004/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data
new file mode 100644
index 0000000000..40cbf30418
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index
new file mode 100644
index 0000000000..dbbacaf8f0
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00005/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data
new file mode 100644
index 0000000000..3f0e32269c
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index
new file mode 100644
index 0000000000..a0818358eb
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00006/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data
new file mode 100644
index 0000000000..effda57ece
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index
new file mode 100644
index 0000000000..a8eb1a1748
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00007/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data
new file mode 100644
index 0000000000..011b956c5f
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index
new file mode 100644
index 0000000000..fada13a256
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00008/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data
new file mode 100644
index 0000000000..f55fa79aca
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/data differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index
new file mode 100644
index 0000000000..8c8793cef8
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/hashes/part-r-00009/index differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest
new file mode 100644
index 0000000000..a95421d027
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/manifest
@@ -0,0 +1,4 @@
+#Wed Dec 30 01:23:41 UTC 2020
+numHashFiles=10
+table=test
+targetBatchSize=10
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions
new file mode 100644
index 0000000000..1d447dd67a
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test/hashtable/partitions differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java
new file mode 100644
index 0000000000..0183f856f1
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+
+import java.util.UUID;
+import org.junit.Test;
+
+public class CleanupHBaseSnapshotRestoreFilesFnTest {
+  private static final String TEST_BUCKET_NAME = "test-bucket";
+  private static final String TEST_SNAPSHOT_PATH = "gs://" + TEST_BUCKET_NAME + "/hbase-export";
+  private static final String TEST_RESTORE_PATH =
+      HBaseSnapshotInputConfigBuilder.RESTORE_DIR + UUID.randomUUID();
+  private static final String TEST_RESTORE_PREFIX = TEST_RESTORE_PATH.substring(1);
+
+  @Test
+  public void testGetWorkingBucketName() {
+    assertEquals(
+        TEST_BUCKET_NAME,
+        CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_SNAPSHOT_PATH));
+
+    assertThrows(
+        IllegalArgumentException.class,
+        () -> {
+          CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_BUCKET_NAME);
+        });
+  }
+
+  @Test
+  public void testGetListPrefix() {
+    assertEquals(
+        TEST_RESTORE_PREFIX, CleanupHBaseSnapshotRestoreFilesFn.getListPrefix(TEST_RESTORE_PATH));
+
+    assertThrows(
+        IllegalArgumentException.class,
+        () -> {
+          CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_RESTORE_PREFIX);
+        });
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
new file mode 100644
index 0000000000..1a681a2e05
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
@@ -0,0 +1,389 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.api.services.storage.model.Objects;
+import com.google.bigtable.repackaged.com.google.gson.Gson;
+import com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot.ImportOptions;
+import com.google.cloud.bigtable.beam.sequencefiles.HBaseResultToMutationFn;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob;
+import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
+import com.google.cloud.bigtable.hbase.BigtableConfiguration;
+import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+import org.apache.beam.runners.dataflow.DataflowRunner;
+import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.PipelineResult.State;
+import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
+import org.apache.beam.sdk.extensions.gcp.util.GcsUtil;
+import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/*
+ * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable and
+ * validates the imported data with SyncTable.
+ * Prepare test data with gsutil(https://cloud.google.com/storage/docs/quickstart-gsutil):
+ * gsutil -m cp -r <PATH_TO_REPO>/bigtable-dataflow-parent/bigtable-beam-import/src/test/integration-test \
+ *  gs://<test_bucket>/
+ *
+ * Setup GCP credential: https://cloud.google.com/docs/authentication
+ *  Ensure your credential have access to Bigtable and Dataflow
+ *
+ * Run with:
+ * mvn integration-test -PhbasesnapshotsIntegrationTest \
+ * -Dgoogle.bigtable.project.id=<project_id> \
+ * -Dgoogle.bigtable.instance.id=<instance_id> \
+ * -Dgoogle.dataflow.stagingLocation=gs://<test_bucket>/staging \
+ * -Dcloud.test.data.folder=gs://<test_bucket>/integration-test/
+ */
+public class EndToEndIT {
+
+  private static Logger LOG = LoggerFactory.getLogger(HBaseResultToMutationFn.class);
+  private static final String TEST_SNAPSHOT_NAME = "test-snapshot";
+  // Location of test data hosted on Google Cloud Storage, for on-cloud dataflow tests.
+  private static final String CLOUD_TEST_DATA_FOLDER = "cloud.test.data.folder";
+  private static final String DATAFLOW_REGION = "region";
+
+  // Column family name used in all test bigtables.
+  private static final String CF = "cf";
+
+  // Full path of the Cloud Storage folder where dataflow jars are uploaded to.
+  private static final String GOOGLE_DATAFLOW_STAGING_LOCATION = "google.dataflow.stagingLocation";
+
+  private Connection connection;
+  private String projectId;
+  private String instanceId;
+  private String tableId;
+  private String region;
+
+  private GcsUtil gcsUtil;
+  private String dataflowStagingLocation;
+  private String workDir;
+  private byte[][] keySplits;
+
+  // Snapshot data setup
+  private String hbaseSnapshotDir;
+  private String hashDir;
+  private String syncTableOutputDir;
+
+  @Before
+  public void setup() throws Exception {
+    projectId = getTestProperty(BigtableOptionsFactory.PROJECT_ID_KEY);
+    instanceId = getTestProperty(BigtableOptionsFactory.INSTANCE_ID_KEY);
+    dataflowStagingLocation = getTestProperty(GOOGLE_DATAFLOW_STAGING_LOCATION);
+    region = getTestProperty(DATAFLOW_REGION);
+    String cloudTestDataFolder = getTestProperty(CLOUD_TEST_DATA_FOLDER);
+    if (!cloudTestDataFolder.endsWith(File.separator)) {
+      cloudTestDataFolder = cloudTestDataFolder + File.separator;
+    }
+
+    hbaseSnapshotDir = cloudTestDataFolder + "data/";
+    UUID test_uuid = UUID.randomUUID();
+    hashDir = cloudTestDataFolder + "hashtable/";
+
+    syncTableOutputDir = dataflowStagingLocation;
+    if (!syncTableOutputDir.endsWith(File.separator)) {
+      syncTableOutputDir = syncTableOutputDir + File.separator;
+    }
+    syncTableOutputDir = syncTableOutputDir + "sync-table-output/" + test_uuid + "/";
+
+    // Cloud Storage config
+    GcpOptions gcpOptions = PipelineOptionsFactory.create().as(GcpOptions.class);
+    gcpOptions.setProject(projectId);
+    gcsUtil = new GcsUtil.GcsUtilFactory().create(gcpOptions);
+
+    // Bigtable config
+    connection = BigtableConfiguration.connect(projectId, instanceId);
+    tableId = "test_" + UUID.randomUUID().toString();
+
+    LOG.info("Setting up integration tests");
+
+    String[] keys = new String[] {"1", "2", "3", "4", "5", "6", "7", "8", "9"};
+    keySplits = new byte[keys.length][];
+    for (int i = 0; i < keys.length; i++) {
+      keySplits[i] = keys[i].getBytes();
+    }
+
+    // Create table in Bigtable
+    TableName tableName = TableName.valueOf(tableId);
+    HTableDescriptor descriptor = new HTableDescriptor(tableName);
+    descriptor.addFamily(new HColumnDescriptor(CF));
+    connection.getAdmin().createTable(descriptor, SnapshotTestingUtils.getSplitKeys());
+  }
+
+  private static String getTestProperty(String name) {
+    return checkNotNull(System.getProperty(name), "Required property missing: " + name);
+  }
+
+  @After
+  public void teardown() throws IOException {
+    final List<GcsPath> paths = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "/*"));
+
+    if (!paths.isEmpty()) {
+      final List<String> pathStrs = new ArrayList<>();
+
+      for (GcsPath path : paths) {
+        pathStrs.add(path.toString());
+      }
+      // TODO: cleanup fails when tests time out. Add a orphan cleaner in the setup()
+      // https://github.com/googleapis/java-bigtable/blob/35588d89b9b243eb691a29d3aff16b9f5a08fbb8/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/test_helpers/env/AbstractTestEnv.java#L108-L119
+      this.gcsUtil.remove(pathStrs);
+    }
+
+    connection.close();
+
+    // delete test table
+    BigtableConfiguration.connect(projectId, instanceId)
+        .getAdmin()
+        .deleteTable(TableName.valueOf(tableId));
+  }
+
+  private SyncTableOptions createSyncTableOptions() {
+    DataflowPipelineOptions syncTableOpts =
+        PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    syncTableOpts.setRunner(DataflowRunner.class);
+    syncTableOpts.setGcpTempLocation(dataflowStagingLocation);
+    syncTableOpts.setNumWorkers(1);
+    syncTableOpts.setProject(projectId);
+    syncTableOpts.setRegion(region);
+
+    SyncTableOptions syncOpts = syncTableOpts.as(SyncTableOptions.class);
+    // Setup Bigtable params
+    syncOpts.setBigtableProject(StaticValueProvider.of(projectId));
+    syncOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId));
+    syncOpts.setBigtableTableId(StaticValueProvider.of(tableId));
+    syncOpts.setBigtableAppProfileId(null);
+
+    // Setup Hashes
+    syncOpts.setHashTableOutputDir(StaticValueProvider.of(hashDir));
+    syncOpts.setOutputPrefix(StaticValueProvider.of(syncTableOutputDir));
+    return syncOpts;
+  }
+
+  private ImportOptions createImportOptions() {
+    DataflowPipelineOptions importPipelineOpts =
+        PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    importPipelineOpts.setRunner(DataflowRunner.class);
+    importPipelineOpts.setGcpTempLocation(dataflowStagingLocation);
+    importPipelineOpts.setNumWorkers(1);
+    importPipelineOpts.setProject(projectId);
+    importPipelineOpts.setRegion(region);
+
+    ImportOptions importOpts = importPipelineOpts.as(ImportOptions.class);
+
+    // setup Bigtable options
+    importOpts.setBigtableProject(StaticValueProvider.of(projectId));
+    importOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId));
+    importOpts.setBigtableTableId(StaticValueProvider.of(tableId));
+
+    // setup HBase snapshot info
+    importOpts.setHbaseSnapshotSourceDir(hbaseSnapshotDir);
+    importOpts.setSnapshotName(TEST_SNAPSHOT_NAME);
+    return importOpts;
+  }
+
+  private Map<String, Long> getCountMap(PipelineResult result) {
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    return StreamSupport.stream(metrics.getCounters().spliterator(), false)
+        .collect(Collectors.toMap((m) -> m.getName().getName(), (m) -> m.getAttempted()));
+  }
+
+  /**
+   * Reads the output of SyncTable job and returns a list of mismatched RangeHashes.
+   *
+   * @throws IOException
+   */
+  private List<RangeHash> readMismatchesFromOutputFiles() throws IOException {
+    Gson gson = new Gson();
+    // Find output files
+    List<GcsPath> outputFiles = gcsUtil.expand(GcsPath.fromUri(syncTableOutputDir + "*"));
+    List<RangeHash> rangeHashes = new ArrayList<>();
+
+    // Read each file line by line and create a RangeHash from it.
+    for (GcsPath outputFile : outputFiles) {
+      int size = (int) gcsUtil.fileSize(outputFile);
+      byte[] fileContents = new byte[size];
+      gcsUtil.open(outputFile).read(ByteBuffer.wrap(fileContents));
+      BufferedReader reader =
+          new BufferedReader(new InputStreamReader(new ByteArrayInputStream(fileContents)));
+      String serializedRangeHash;
+      while ((serializedRangeHash = reader.readLine()) != null) {
+        try {
+          rangeHashes.add(gson.fromJson(serializedRangeHash.trim(), RangeHash.class));
+        } catch (Exception e) {
+          LOG.error("Failed to parse JSON: [" + serializedRangeHash + "]", e);
+          throw e;
+        }
+      }
+    }
+    return rangeHashes;
+  }
+
+  // Asserts that all the rowKeys belong in mismatches.
+  // Throws AssertionException
+  private void validateRowInRangeHashes(List<byte[]> rowKeys, Iterable<RangeHash> mismatches) {
+    for (byte[] mismatchedRowKey : rowKeys) {
+      Assert.assertTrue(containsRow(mismatchedRowKey, mismatches));
+    }
+  }
+
+  // Returns true if the rowKey belongs in one of the ranges contained in rangeHashes.
+  private boolean containsRow(byte[] rowKey, Iterable<RangeHash> rangeHashes) {
+    for (RangeHash mismatchedRange : rangeHashes) {
+      // TODO: There maybe a better Range.belongs() utility function somewhere?
+      // Empty start/end key means that there is no start/end key.
+      if ((mismatchedRange.startInclusive.equals(HConstants.EMPTY_BYTE_ARRAY)
+              || mismatchedRange.startInclusive.compareTo(rowKey) <= 0)
+          && (mismatchedRange.stopExclusive.equals(HConstants.EMPTY_BYTE_ARRAY)
+              || mismatchedRange.stopExclusive.compareTo(rowKey) > 0)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Test
+  public void testHBaseSnapshotImport() throws Exception {
+
+    // Start import
+    ImportOptions importOpts = createImportOptions();
+
+    // run pipeline
+    State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    // check that the .restore dir used for temp files has been removed
+    Objects objects =
+        gcsUtil.listObjects(
+            GcsPath.fromUri(hbaseSnapshotDir).getBucket(),
+            CleanupHBaseSnapshotRestoreFilesFn.getListPrefix(
+                HBaseSnapshotInputConfigBuilder.RESTORE_DIR),
+            null);
+    Assert.assertNull(objects.getItems());
+
+    SyncTableOptions syncOpts = createSyncTableOptions();
+
+    PipelineResult result = SyncTableJob.buildPipeline(syncOpts).run();
+    state = result.waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    // Read the output files and validate that there are no mismatches.
+    Assert.assertEquals(0, readMismatchesFromOutputFiles().size());
+
+    // Validate the counters.
+    Map<String, Long> counters = getCountMap(result);
+    Assert.assertEquals(counters.get("ranges_matched"), (Long) 101L);
+    Assert.assertEquals(counters.get("ranges_not_matched"), (Long) 0L);
+  }
+
+  /**
+   * Introduces multiple corruptions in imported table and validates that sync-table can detect
+   * them.
+   */
+  @Test
+  public void testHBaseSnapshotImportWithCorruptions() throws Exception {
+    // Import snapshot
+    ImportOptions importOpts = createImportOptions();
+    State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    // Rows where corruptions will be added.
+    byte[] mismatchRowAtStart = "000".getBytes();
+    byte[] mismatchRowInMiddle = "24".getBytes();
+    byte[] mismatchRowDeleted = "64".getBytes();
+    byte[] mismatchRowAtTheEnd = "999".getBytes();
+
+    // Introduce corruptions to the data in Bigtable. Delete data from Bigtable to simulate Bigtable
+    // missing data. Add data to Bigtable to simulate extra data in Bigtable. It is easier to update
+    // Bigtable than change the snapshots.
+    Table table = connection.getTable(TableName.valueOf(tableId));
+    Cell cellInMiddle = table.get(new Get(mismatchRowInMiddle)).rawCells()[0];
+    List<Put> puts =
+        Arrays.asList(
+            // Add a row at the start
+            new Put(mismatchRowAtStart)
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 1L, "value000".getBytes())
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 2L, "value001".getBytes()),
+            // change a cell in middle
+            new Put(cellInMiddle.getRowArray())
+                .addColumn(
+                    cellInMiddle.getFamilyArray(),
+                    cellInMiddle.getQualifierArray(),
+                    cellInMiddle.getTimestamp(),
+                    "corrupted_val".getBytes()),
+            // add a new row in the end
+            new Put(mismatchRowAtTheEnd)
+                .addColumn(CF.getBytes(), "random_col".getBytes(), 100L, "value999".getBytes()));
+
+    table.put(puts);
+    // Delete a random row in the middle. We should see 4 ranges mismatch as table is split on
+    // 1,2...9. All the updates are happening on a different split.
+    table.delete(new Delete(mismatchRowDeleted));
+
+    // Run SyncTable job and expect 4 mismatches.
+    SyncTableOptions syncOpts = createSyncTableOptions();
+    PipelineResult result = SyncTableJob.buildPipeline(syncOpts).run();
+    state = result.waitUntilFinish();
+    Assert.assertEquals(State.DONE, state);
+
+    List<RangeHash> syncTableOutputMismatches = readMismatchesFromOutputFiles();
+    Assert.assertEquals(4, syncTableOutputMismatches.size());
+    validateRowInRangeHashes(
+        Arrays.asList(
+            mismatchRowAtStart, mismatchRowAtTheEnd, mismatchRowDeleted, mismatchRowInMiddle),
+        syncTableOutputMismatches);
+
+    // Assert that the output collection is the right one.
+    Map<String, Long> counters = getCountMap(result);
+    Assert.assertEquals(counters.get("ranges_matched"), (Long) 97L);
+    Assert.assertEquals(counters.get("ranges_not_matched"), (Long) 4L);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java
new file mode 100644
index 0000000000..579a57c238
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.junit.Test;
+
+public class HBaseSnapshotInputConfigBuilderTest {
+
+  private static final String TEST_PROJECT = "test_project";
+  private static final String TEST_SNAPSHOT_DIR = "gs://test-bucket/hbase-export";
+  private static final String TEST_SNAPSHOT_NAME = "test_snapshot";
+
+  @Test
+  public void testBuildingHBaseSnapshotInputConfigBuilder() {
+    Configuration conf =
+        new HBaseSnapshotInputConfigBuilder()
+            .setProjectId(TEST_PROJECT)
+            .setHbaseSnapshotSourceDir(TEST_SNAPSHOT_DIR)
+            .setSnapshotName(TEST_SNAPSHOT_NAME)
+            .createHBaseConfiguration();
+    assertEquals(
+        "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS", conf.get("fs.AbstractFileSystem.gs.impl"));
+    assertEquals(TEST_PROJECT, conf.get("fs.gs.project.id"));
+    assertEquals(TEST_SNAPSHOT_DIR, conf.get("hbase.rootdir"));
+    assertEquals(
+        TableSnapshotInputFormat.class,
+        conf.getClass(
+            "mapreduce.job.inputformat.class", TableSnapshotInputFormat.class, InputFormat.class));
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/it/CloudBigtableBeamITTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/it/CloudBigtableBeamITTest.java
index d2a095a5e3..fd9909f37f 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/it/CloudBigtableBeamITTest.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/it/CloudBigtableBeamITTest.java
@@ -102,13 +102,13 @@ public class CloudBigtableBeamITTest {
 
   private final Log LOG = LogFactory.getLog(getClass());
 
-  private static final String STAGING_LOCATION_KEY = "dataflowStagingLocation";
-  private static final String ZONE_ID_KEY = "dataflowZoneId";
+  private static final String STAGING_LOCATION_KEY = "google.dataflow.stagingLocation";
+  private static final String REGION_KEY = "region";
 
   private static final String projectId = System.getProperty(PROJECT_ID_KEY);
   private static final String instanceId = System.getProperty(INSTANCE_ID_KEY);
   private static final String stagingLocation = System.getProperty(STAGING_LOCATION_KEY);
-  private static final String zoneId = System.getProperty(ZONE_ID_KEY);
+  private static final String region = System.getProperty(REGION_KEY);
 
   private static final String workerMachineType =
       System.getProperty("workerMachineType", "n1" + "-standard-8");
@@ -129,7 +129,7 @@ public class CloudBigtableBeamITTest {
   @BeforeClass
   public static void setUpConfiguration() {
     Preconditions.checkArgument(stagingLocation != null, "Set -D" + STAGING_LOCATION_KEY + ".");
-    Preconditions.checkArgument(zoneId != null, "Set -D" + ZONE_ID_KEY + ".");
+    Preconditions.checkArgument(region != null, "Set -D" + REGION_KEY + ".");
     Preconditions.checkArgument(projectId != null, "Set -D" + PROJECT_ID_KEY + ".");
     Preconditions.checkArgument(instanceId != null, "Set -D" + INSTANCE_ID_KEY + ".");
   }
@@ -255,7 +255,7 @@ private static byte[] createRandomValue() {
   private DataflowPipelineOptions createOptions() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setProject(projectId);
-    options.setZone(zoneId);
+    options.setRegion(region);
     options.setStagingLocation(stagingLocation + "/stage");
     options.setTempLocation(stagingLocation + "/temp");
     options.setRunner(DataflowRunner.class);
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/sequencefiles/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/sequencefiles/EndToEndIT.java
index 8f5cd823c7..1958e04307 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/sequencefiles/EndToEndIT.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/sequencefiles/EndToEndIT.java
@@ -55,6 +55,7 @@
 public class EndToEndIT {
   // Location of test data hosted on Google Cloud Storage, for on-cloud dataflow tests.
   private static final String CLOUD_TEST_DATA_FOLDER = "cloud.test.data.folder";
+  private static final String DATAFLOW_REGION = "region";
 
   // Column family name used in all test bigtables.
   private static final String CF = "column_family";
@@ -66,6 +67,7 @@ public class EndToEndIT {
   private String projectId;
   private String instanceId;
   private String tableId;
+  private String region;
 
   private GcsUtil gcsUtil;
   private String cloudTestDataFolder;
@@ -76,7 +78,7 @@ public class EndToEndIT {
   public void setup() throws Exception {
     projectId = getTestProperty(BigtableOptionsFactory.PROJECT_ID_KEY);
     instanceId = getTestProperty(BigtableOptionsFactory.INSTANCE_ID_KEY);
-
+    region = getTestProperty(DATAFLOW_REGION);
     dataflowStagingLocation = getTestProperty(GOOGLE_DATAFLOW_STAGING_LOCATION);
 
     cloudTestDataFolder = getTestProperty(CLOUD_TEST_DATA_FOLDER);
@@ -152,6 +154,7 @@ public void testExportImport() throws Exception {
       pipelineOpts.setGcpTempLocation(dataflowStagingLocation);
       pipelineOpts.setNumWorkers(1);
       pipelineOpts.setProject(projectId);
+      pipelineOpts.setRegion(region);
 
       ExportOptions exportOpts = pipelineOpts.as(ExportOptions.class);
       exportOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId));
@@ -172,6 +175,7 @@ public void testExportImport() throws Exception {
           PipelineOptionsFactory.as(DataflowPipelineOptions.class);
       createTablePipelineOpts.setRunner(DataflowRunner.class);
       createTablePipelineOpts.setProject(projectId);
+      createTablePipelineOpts.setRegion(region);
 
       CreateTableHelper.CreateTableOpts createOpts =
           createTablePipelineOpts.as(CreateTableHelper.CreateTableOpts.class);
@@ -188,6 +192,7 @@ public void testExportImport() throws Exception {
       importPipelineOpts.setGcpTempLocation(dataflowStagingLocation);
       importPipelineOpts.setNumWorkers(1);
       importPipelineOpts.setProject(projectId);
+      importPipelineOpts.setRegion(region);
 
       ImportJob.ImportOptions importOpts = importPipelineOpts.as(ImportJob.ImportOptions.class);
       importOpts.setBigtableProject(StaticValueProvider.of(projectId));
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
new file mode 100644
index 0000000000..96d5960423
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/BufferedHadoopHashTableSourceTest.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.SourceTestUtils;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class BufferedHadoopHashTableSourceTest {
+
+  private BufferedHadoopHashTableSource bufferedSource;
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final String HASH_TABLE_OUTPUT_PATH_DIR = "gs://my-bucket/outputDir";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("AAAA".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW =
+      new ImmutableBytesWritable("ZZZZ".getBytes());
+  private static final ImmutableBytesWritable POST_STOP_ROW =
+      new ImmutableBytesWritable("z".getBytes()); // Lowercase z is lexicographically > uppercase Z
+  private static final ImmutableBytesWritable EMPTY_ROW =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+  private static final ImmutableBytesWritable START_HASH =
+      new ImmutableBytesWritable("START-HASH".getBytes());
+  private static final int BATCH_SIZE = 5;
+
+  @Before
+  public void setUp() throws Exception {
+    fakeTableHashWrapper =
+        new FakeTableHashWrapper(
+            START_ROW, STOP_ROW, new ArrayList<>(), new ArrayList<>(), new Scan());
+    bufferedSource =
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of("cbt-dev"),
+                StaticValueProvider.of(HASH_TABLE_OUTPUT_PATH_DIR),
+                START_ROW,
+                STOP_ROW,
+                new FakeTableHashWrapperFactory(fakeTableHashWrapper)),
+            BATCH_SIZE);
+  }
+
+  protected static ImmutableBytesWritable getKey(int keyIndex) {
+    return new ImmutableBytesWritable(("KEY-" + keyIndex).getBytes());
+  }
+
+  protected static ImmutableBytesWritable getHash(int hashIndex) {
+    return new ImmutableBytesWritable(("HASH-" + hashIndex).getBytes());
+  }
+
+  /**
+   * Populates the fakeTableHashWrapper with {@code numEntries} entries starting with startKey.
+   * Returns a List of expected RangeHashes for this data, for numEntries=1, single RangeHash is
+   * returned (startRow, stopRow, START_HASH).
+   */
+  protected List<KV<String, List<RangeHash>>> setupTestData(
+      ImmutableBytesWritable startRow, ImmutableBytesWritable stopRow, int numEntries) {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.hashes.add(KV.of(startRow, START_HASH));
+    for (int i = 0; i < numEntries - 1; i++) {
+      fakeTableHashWrapper.hashes.add(KV.of(getKey(i), getHash(i)));
+    }
+
+    List<KV<String, List<RangeHash>>> out = new ArrayList<>();
+    // Setup RangeHashes to be returned
+    List<RangeHash> expectedRangeHashes = new ArrayList<>();
+    ImmutableBytesWritable key = startRow;
+    ImmutableBytesWritable hash = START_HASH;
+    for (int i = 0; i < numEntries - 1; i++) {
+      expectedRangeHashes.add(RangeHash.of(key, getKey(i), hash));
+      key = getKey(i);
+      hash = getHash(i);
+      if (expectedRangeHashes.size() % BATCH_SIZE == 0) {
+        out.add(
+            KV.of(
+                Bytes.toStringBinary(expectedRangeHashes.get(0).startInclusive.copyBytes()),
+                expectedRangeHashes));
+        expectedRangeHashes = new ArrayList<>();
+      }
+    }
+    // Process the last range
+    expectedRangeHashes.add(RangeHash.of(key, stopRow, hash));
+    // Finalize the last batch
+    out.add(
+        KV.of(
+            Bytes.toStringBinary(expectedRangeHashes.get(0).startInclusive.copyBytes()),
+            expectedRangeHashes));
+
+    return out;
+  }
+
+  @Test
+  public void testHashReaderEmpty() throws IOException {
+    // The tableHashWrapper has no hashes, this should result in empty source.
+    assertEquals(Arrays.asList(), SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderPartialBuffer() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleBatches() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 20);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleBatchesWithPartialBatchAtEnd() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<KV<String, List<RangeHash>>> expected = setupTestData(START_ROW, STOP_ROW, 23);
+    assertEquals(expected, SourceTestUtils.readFromSource(bufferedSource, null));
+  }
+
+  @Test
+  public void testSplitEqualsUnsplit() throws Exception {
+    fakeTableHashWrapper.partitions = Arrays.asList(getKey(4), getKey(9));
+    SourceTestUtils.assertSourcesEqualReferenceSource(
+        bufferedSource, bufferedSource.split(0, null), null);
+  }
+
+  @Test
+  public void testUnstartedReaderEqualsStarted() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(
+        bufferedSource.createReader(null), null);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
new file mode 100644
index 0000000000..a27288f7da
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/ComputeAndValidateHashFromBigtableDoFnTest.java
@@ -0,0 +1,469 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.models.GCRules.GCRULES;
+
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient;
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings;
+import com.google.bigtable.repackaged.com.google.cloud.bigtable.admin.v2.models.CreateTableRequest;
+import com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.cloud.bigtable.emulator.v2.BigtableEmulatorRule;
+import com.google.cloud.bigtable.hbase.BigtableConfiguration;
+import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.metrics.MetricQueryResults;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.BigtableTableHashAccessor.BigtableResultHasher;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(JUnit4.class)
+public class ComputeAndValidateHashFromBigtableDoFnTest {
+
+  private static final byte[] EMPTY_ROW_KEY = HConstants.EMPTY_BYTE_ARRAY;
+  protected final Logger LOG = LoggerFactory.getLogger(getClass());
+
+  public static final String FAKE_TABLE = "fake-table";
+  private static final String ROW_KEY_PREFIX = "row-";
+  private static final String VALUE_PREFIX = "value-";
+  private static final byte[] EXTRA_VALUE = "add".getBytes();
+  private static final byte[] CF = "cf".getBytes();
+  private static final byte[] CF2 = "cf".getBytes();
+  private static final byte[] COL = "col".getBytes();
+  private static final long TS = 1000l;
+  private static final int FIRST_ROW_INDEX = 20;
+  private static final int LAST_ROW_INDEX = 31;
+
+  @Rule public final BigtableEmulatorRule bigtableEmulator = BigtableEmulatorRule.create();
+
+  @Rule public final transient TestPipeline p = TestPipeline.create();
+
+  private ComputeAndValidateHashFromBigtableDoFn doFn;
+
+  // Clients that will be connected to the emulator
+  private BigtableTableAdminClient tableAdminClient;
+  private Table table;
+  // Fake a TableHashWrapper.
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private List<RangeHash> hashes;
+
+  @Before
+  public void setUp() throws IOException {
+    hashes = new ArrayList<>();
+    // Initialize the clients to connect to the emulator
+    tableAdminClient =
+        BigtableTableAdminClient.create(
+            BigtableTableAdminSettings.newBuilderForEmulator(bigtableEmulator.getPort())
+                .setProjectId("fake-project")
+                .setInstanceId("fake-instance")
+                .build());
+
+    CloudBigtableTableConfiguration config =
+        new CloudBigtableTableConfiguration.Builder()
+            .withProjectId("fake-project")
+            .withInstanceId("fake-instance")
+            .withTableId(FAKE_TABLE)
+            .withConfiguration(
+                BigtableOptionsFactory.BIGTABLE_EMULATOR_HOST_KEY,
+                "localhost:" + bigtableEmulator.getPort())
+            .build();
+
+    Connection connection = BigtableConfiguration.connect(config.toHBaseConfig());
+    table = connection.getTable(TableName.valueOf(FAKE_TABLE));
+    fakeTableHashWrapper = new FakeTableHashWrapper();
+    // Scan all the cells for the column, HBase scan fetches 1 cell/column by default
+    fakeTableHashWrapper.scan = new Scan().setMaxVersions();
+
+    FakeTableHashWrapperFactory fakeFactory = new FakeTableHashWrapperFactory(fakeTableHashWrapper);
+
+    doFn =
+        new ComputeAndValidateHashFromBigtableDoFn(
+            config,
+            StaticValueProvider.of(FAKE_TABLE),
+            StaticValueProvider.of("proj"),
+            StaticValueProvider.of("hash"),
+            fakeFactory);
+
+    // Create a test table that can be used in tests
+    tableAdminClient.createTable(
+        CreateTableRequest.of(FAKE_TABLE)
+            .addFamily(new String(CF), GCRULES.maxVersions(100))
+            .addFamily(new String(CF2), GCRULES.maxVersions(100)));
+
+    p.getCoderRegistry().registerCoderForClass(RangeHash.class, new RangeHashCoder());
+
+    // Fill CBT table with data.
+    writeDataToTable();
+  }
+
+  @After
+  public void tearDown() {
+    // TODO should we delete the table for each test?
+    tableAdminClient.deleteTable(FAKE_TABLE);
+  }
+
+  private byte[] getRowKey(int i) {
+    return (ROW_KEY_PREFIX + i).getBytes();
+  }
+
+  private byte[] getValue(int rowIndex, int cellIndex) {
+    return (VALUE_PREFIX + rowIndex + "-" + cellIndex).getBytes();
+  }
+
+  private void writeDataToTable() throws IOException {
+    List<Put> puts = new ArrayList<>();
+    // Tests use the rows 21-30. Setup some extra data simulate the real world scenario where
+    // there will be other workitems working parallely on the table.
+    for (int i = 20; i < 32; i++) {
+      for (int j = 0; j < 2; j++) {
+        // Insert rows with 2 cells each
+        Put put = new Put(getRowKey(i));
+        put.addColumn(CF, COL, TS + j, getValue(i, j));
+        puts.add(put);
+      }
+    }
+    table.put(puts);
+  }
+
+  /** Deletes the row range [startIndex, stopIndex) */
+  private void deleteRange(int startIndex, int stopIndex) throws IOException {
+    for (int i = startIndex; i < stopIndex; i++) {
+      table.delete(new Delete(getRowKey(i)));
+    }
+  }
+
+  // Creates a RangeHash for range [startRow, stopRow).
+  private RangeHash createHash(byte[] startRow, byte[] stopRow) throws IOException {
+    LOG.debug("Creating hash for rows " + startRow + " to " + stopRow);
+    BigtableResultHasher hasher = new BigtableResultHasher();
+    hasher.startBatch(new ImmutableBytesWritable(startRow));
+
+    // Scan all the cells for a column.
+    Scan scan = new Scan().setMaxVersions().withStartRow(startRow).withStopRow(stopRow, false);
+
+    // Read the rows from Bigtable and compute the expected hash.
+    for (Result result : table.getScanner(scan)) {
+      LOG.debug("Adding result to hash: " + result);
+      hasher.hashResult(result);
+    }
+    hasher.finishBatch();
+    return RangeHash.of(
+        new ImmutableBytesWritable(startRow),
+        new ImmutableBytesWritable(stopRow),
+        hasher.getBatchHash());
+  }
+
+  private void validateCounters(
+      PipelineResult result, Long expectedMatches, Long expectedMismatches) {
+    MetricQueryResults metrics = result.metrics().allMetrics();
+    Map<String, Long> counters =
+        StreamSupport.stream(metrics.getCounters().spliterator(), false)
+            .collect(Collectors.toMap((m) -> m.getName().getName(), (m) -> m.getAttempted()));
+    Assert.assertEquals(expectedMatches, counters.get("ranges_matched"));
+    Assert.assertEquals(expectedMismatches, counters.get("ranges_not_matched"));
+  }
+
+  ////////// Happy case tests for various setups//////////////////////
+  @Test
+  public void testHashMatchesForMultipleRange() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(28)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 0L);
+  }
+
+  @Test
+  public void testHashMatchesForSingleRange() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder();
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 0L);
+  }
+
+  @Test
+  public void testHashMatchesForFullTableScanWithMultipleRange() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), EMPTY_ROW_KEY));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 0L);
+  }
+
+  @Test
+  public void testHashMatchesForMultipleSingleRowRange() throws Exception {
+    hashes.add(createHash(getRowKey(22), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(25)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(22)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).empty();
+    PipelineResult result = p.run();
+    validateCounters(result, 3L, 0L);
+  }
+
+  ///////////////// Test mismatches when Bigtable has extra rows ////////////////////
+  @Test
+  public void testAdditionalCellInMiddle() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(30)));
+
+    // Add an extra cell in the table
+    table.put(new Put(getRowKey(25)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(1));
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 1L);
+  }
+
+  @Test
+  public void testAdditionalRowsAtEnds() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Add an extra row in the beginning
+    table.put(new Put(getRowKey(1)).addColumn(CF, COL, EXTRA_VALUE));
+
+    // Add an extra row at the end.
+    table.put(new Put(getRowKey(5)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(0), hashes.get(2));
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 2L);
+  }
+
+  ///////////////////// Test different values ///////////////////////////
+  @Test
+  public void testDifferentValues() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Modify the CF
+    table.delete(new Delete(getRowKey(20)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(1)).addColumn(CF2, COL, TS, getValue(20, 0)));
+
+    // Modify the qualifier
+    table.delete(new Delete(getRowKey(22)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(22)).addColumn(CF, "random-col".getBytes(), TS, getValue(22, 0)));
+
+    // Modify the timestamp
+    table.delete(new Delete(getRowKey(24)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(24)).addColumn(CF, COL, 1, getValue(24, 0)));
+
+    // Modify the value
+    table.delete(new Delete(getRowKey(26)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(26)).addColumn(CF, COL, getValue(26, 0)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(1), hashes.get(2), hashes.get(3));
+    PipelineResult result = p.run();
+    validateCounters(result, 1L, 4L);
+  }
+
+  ////////////////// Tests with CBT missing data //////////////////////////////
+  @Test
+  public void testMissingRows() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), EMPTY_ROW_KEY));
+
+    // Delete a row at the beginning
+    table.delete(new Delete(getRowKey(FIRST_ROW_INDEX)));
+
+    // Delete a row at the middle
+    table.delete(new Delete(getRowKey(24)));
+
+    // Delete a row at the end
+    table.delete(new Delete(getRowKey(LAST_ROW_INDEX)));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4));
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 3L);
+  }
+
+  @Test
+  public void testMissingRanges() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete a range at the beginning
+    deleteRange(FIRST_ROW_INDEX, 21);
+
+    // Delete a range in middle
+    deleteRange(23, 25);
+
+    // Delete row ranges at the end, bigtable scanner will finish with multiple row-ranges to
+    // process.
+    deleteRange(27, LAST_ROW_INDEX + 1);
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4), hashes.get(5));
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 4L);
+  }
+
+  @Test
+  public void testCbtEmpty() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete all data from bigtable
+    deleteRange(FIRST_ROW_INDEX, LAST_ROW_INDEX);
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes);
+    PipelineResult result = p.run();
+    validateCounters(result, 0L, 3L);
+  }
+
+  ////////////////////// Test that scan is used from TableHash.////////////////////////
+  @Test
+  public void testScanFromTableHash() throws Exception {
+    hashes.add(createHash(getRowKey(21), getRowKey(24)));
+    hashes.add(createHash(getRowKey(24), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(30)));
+
+    // Update the TableHashWrapper Scan to default. Scan from HashTable.TableHash determines the
+    // cells used to compute hash. CBT has to use the same cells for validation.
+    fakeTableHashWrapper.scan = new Scan();
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(getRowKey(21)), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output).containsInAnyOrder(hashes);
+    PipelineResult result = p.run();
+    validateCounters(result, 0L, 3L);
+  }
+
+  ////////////////////// Combination of different cases //////////////////////////////////
+  @Test
+  public void testMismatchesComprehensive() throws Exception {
+    hashes.add(createHash(EMPTY_ROW_KEY, getRowKey(21)));
+    hashes.add(createHash(getRowKey(21), getRowKey(23)));
+    hashes.add(createHash(getRowKey(23), getRowKey(25)));
+    hashes.add(createHash(getRowKey(25), getRowKey(27)));
+    hashes.add(createHash(getRowKey(27), getRowKey(29)));
+    hashes.add(createHash(getRowKey(29), EMPTY_ROW_KEY));
+
+    // Delete a range at the beginning from CBT
+    deleteRange(FIRST_ROW_INDEX, 21);
+
+    // Delete a row in middle from CBT
+    table.delete(new Delete(getRowKey(23)));
+
+    // Update a value in CBT
+    table.delete(new Delete(getRowKey(27)).addColumns(CF, COL, TS));
+    table.put(new Put(getRowKey(27)).addColumn(CF, COL, getValue(27, 0)));
+
+    // Add an extra row at the end.
+    table.put(new Put(getRowKey(5)).addColumn(CF, COL, EXTRA_VALUE));
+
+    PCollection<KV<String, Iterable<List<RangeHash>>>> input =
+        p.apply(Create.of(KV.of(new String(EMPTY_ROW_KEY), Arrays.asList(hashes))));
+
+    PCollection<RangeHash> output = input.apply(ParDo.of(doFn));
+    PAssert.that(output)
+        .containsInAnyOrder(hashes.get(0), hashes.get(2), hashes.get(4), hashes.get(5));
+    PipelineResult result = p.run();
+    validateCounters(result, 2L, 4L);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
new file mode 100644
index 0000000000..ee2b6814e2
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapper.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.bigtable.repackaged.com.google.gson.Gson;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * A fake for TableHashWrapper that allows us to mock the behavior of hbase's HashTable.TableHash
+ */
+public class FakeTableHashWrapper implements TableHashWrapper {
+
+  // Sorted list of partition keys splitting the key range.
+  public List<ImmutableBytesWritable> partitions;
+  // List of <Key,Hash> sorted by key.
+  public List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> hashes;
+  public ImmutableBytesWritable startRowInclusive;
+  public ImmutableBytesWritable stopRowExclusive;
+  public Scan scan;
+  private static final long serialVersionUID = 34876543L;
+
+  public FakeTableHashWrapper() {
+    this(
+        new ImmutableBytesWritable(),
+        new ImmutableBytesWritable(),
+        new ArrayList<>(),
+        new ArrayList<>(),
+        new Scan());
+  }
+
+  public FakeTableHashWrapper(
+      ImmutableBytesWritable startRowInclusive,
+      ImmutableBytesWritable stopRowExclusive,
+      List<ImmutableBytesWritable> partitions,
+      List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> hashes,
+      Scan scan) {
+    super();
+    this.startRowInclusive = startRowInclusive;
+    this.stopRowExclusive = stopRowExclusive;
+    this.partitions = partitions;
+    this.hashes = hashes;
+    this.scan = scan;
+  }
+
+  @Override
+  public int getNumHashFiles() {
+    return partitions.size() + 1;
+  }
+
+  @Override
+  public ImmutableList<ImmutableBytesWritable> getPartitions() {
+    return ImmutableList.copyOf(partitions);
+  }
+
+  @Override
+  public ImmutableBytesWritable getStartRow() {
+    return startRowInclusive;
+  }
+
+  @Override
+  public ImmutableBytesWritable getStopRow() {
+    return stopRowExclusive;
+  }
+
+  @Override
+  public Scan getScan() {
+    return scan;
+  }
+
+  @Override
+  public TableHashReader newReader(Configuration conf, ImmutableBytesWritable startRow) {
+    return new FakeTableHashReader(startRow);
+  }
+
+  private void writeObject(ObjectOutputStream s) throws IOException {
+    Gson gson = new Gson();
+    s.writeObject(gson.toJson(scan));
+    s.writeObject(gson.toJson(startRowInclusive));
+    s.writeObject(gson.toJson(stopRowExclusive));
+    s.writeObject(gson.toJson(partitions));
+    s.writeObject(gson.toJson(hashes));
+  }
+
+  private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
+    Gson gson = new Gson();
+    scan = gson.fromJson((String) s.readObject(), Scan.class);
+    startRowInclusive = gson.fromJson((String) s.readObject(), ImmutableBytesWritable.class);
+    stopRowExclusive = gson.fromJson((String) s.readObject(), ImmutableBytesWritable.class);
+    partitions = gson.fromJson((String) s.readObject(), ArrayList.class);
+    hashes = gson.fromJson((String) s.readObject(), ArrayList.class);
+  }
+
+  public class FakeTableHashReader implements TableHashReader {
+    private final ImmutableBytesWritable startRow;
+    // Copy of items to be read by this reader.
+    private final List<KV<ImmutableBytesWritable, ImmutableBytesWritable>> entriesToRead;
+    // First next() will make index = 0, and compare it with the size of entriesToRead.
+    private int index = -1;
+
+    public FakeTableHashReader(ImmutableBytesWritable startRow) {
+      this.startRow = startRow;
+      entriesToRead = new ArrayList<>();
+      for (KV<ImmutableBytesWritable, ImmutableBytesWritable> hash : hashes) {
+        // Collect all the entries after startRow.
+        if (hash.getKey().compareTo(startRow) >= 0) {
+          entriesToRead.add(hash);
+        }
+      }
+    }
+
+    @Override
+    public boolean next() throws IOException {
+      return ++index < entriesToRead.size();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentKey() {
+      return entriesToRead.get(index).getKey();
+    }
+
+    @Override
+    public ImmutableBytesWritable getCurrentHash() {
+      return entriesToRead.get(index).getValue();
+    }
+
+    @Override
+    public void close() throws IOException {
+      // NOOP
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
new file mode 100644
index 0000000000..2e65e3b855
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/FakeTableHashWrapperFactory.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+public class FakeTableHashWrapperFactory extends TableHashWrapperFactory {
+
+  private static final long serialVersionUID = 269854624L;
+
+  private final FakeTableHashWrapper fakeTableHashWrapper;
+
+  public FakeTableHashWrapperFactory(FakeTableHashWrapper wrapper) {
+    this.fakeTableHashWrapper = wrapper;
+  }
+
+  @Override
+  public TableHashWrapper getTableHash(String projectId, String sourceHashDir) {
+    return fakeTableHashWrapper;
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
new file mode 100644
index 0000000000..fa88a56d14
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashBasedReaderTest.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.testing.SourceTestUtils;
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HadoopHashBasedReaderTest {
+
+  private HadoopHashTableSource hashTableSource;
+  private FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final String HASH_TABLE_OUTPUT_PATH_DIR = "gs://my-bucket/outputDir";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("AAAA".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW =
+      new ImmutableBytesWritable("ZZZZ".getBytes());
+  private static final ImmutableBytesWritable EMPTY_ROW =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+  private static final ImmutableBytesWritable START_HASH =
+      new ImmutableBytesWritable("START-HASH".getBytes());
+
+  @Before
+  public void setUp() throws Exception {
+    fakeTableHashWrapper =
+        new FakeTableHashWrapper(
+            START_ROW, STOP_ROW, new ArrayList<>(), new ArrayList<>(), new Scan());
+    hashTableSource =
+        new HadoopHashTableSource(
+            StaticValueProvider.of("cbt-dev"),
+            StaticValueProvider.of(HASH_TABLE_OUTPUT_PATH_DIR),
+            START_ROW,
+            STOP_ROW,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+  }
+
+  protected static ImmutableBytesWritable getKey(int keyIndex) {
+    return new ImmutableBytesWritable(("KEY-" + keyIndex).getBytes());
+  }
+
+  protected static ImmutableBytesWritable getHash(int hashIndex) {
+    return new ImmutableBytesWritable(("HASH-" + hashIndex).getBytes());
+  }
+
+  /**
+   * Populates the fakeTableHashWrapper with {@code numEntries} entries starting with startKey.
+   * Returns a List of expected RangeHashes for this data, for numEntries=1, single RangeHash is
+   * returned (startRow, stopRow, START_HASH).
+   */
+  protected List<RangeHash> setupTestData(
+      ImmutableBytesWritable startRow, ImmutableBytesWritable stopRow, int numEntries) {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.hashes.add(KV.of(startRow, START_HASH));
+    for (int i = 0; i < numEntries - 1; i++) {
+      fakeTableHashWrapper.hashes.add(KV.of(getKey(i), getHash(i)));
+    }
+
+    // Setup RangeHashes to be returned
+    List<RangeHash> expectedRangeHashes = new ArrayList<>();
+    ImmutableBytesWritable key = startRow;
+    ImmutableBytesWritable hash = START_HASH;
+    for (int i = 0; i < numEntries - 1; i++) {
+      expectedRangeHashes.add(RangeHash.of(key, getKey(i), hash));
+      key = getKey(i);
+      hash = getHash(i);
+    }
+    expectedRangeHashes.add(RangeHash.of(key, stopRow, hash));
+    return expectedRangeHashes;
+  }
+
+  /////////////////////////////// Test the end of HashTable Output /////////////////////////
+
+  @Test
+  public void testHashReaderEmpty() throws IOException {
+    // The tableHashWrapper has no hashes, this should result in empty source.
+    assertEquals(Arrays.asList(), SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderSingleHashBatch() throws IOException {
+    // Setup 1 entry in this hashtable datafile. The test is setup so that HashTable datafile has
+    // only 1 entry.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 1);
+
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderMultipleHashBatch() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  //////////////////// Test the end of HashTable output when end of range is ""/////////////////
+  @Test
+  public void testHashReaderWithEmptyEndRow() throws IOException {
+    // Setup 4 entries in this hashtable datafile with no start or stop keys set.
+    List<RangeHash> expected = setupTestData(EMPTY_ROW, EMPTY_ROW, 4);
+    hashTableSource.startRowInclusive = EMPTY_ROW;
+    hashTableSource.stopRowExclusive = EMPTY_ROW;
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  /////////////////////////////// Test reader.getCurrent() >= stopRow /////////////////////////
+
+  @Test
+  public void testHashReaderWorkItemEndedOnFirstBatch() throws IOException {
+    // Setup 1 entry in this hashtable datafile. This entry is outside of the workitem's row
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, START_HASH));
+    // Source will be empty as no hashes fall in its bounds.
+    assertEquals(new ArrayList<RangeHash>(), SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderWorkItemEndedOnSecondEntry() throws IOException {
+    // Setup 1 entry in this hashtable datafile. The test is setup so that HashTable datafile has
+    // only 1 entry.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 1);
+    // Add a next entry at the stop row. Reader should stop and read just 1 entry.
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, getHash(100)));
+
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testHashReaderWorkItemEndedAfterMultipleBatches() throws IOException {
+    // Setup 4 entries in this hashtable datafile.
+    List<RangeHash> expected = setupTestData(START_ROW, STOP_ROW, 4);
+    // Add a next entry at the stop row. Reader should stop and read just 4 entry.
+    fakeTableHashWrapper.hashes.add(KV.of(STOP_ROW, getHash(100)));
+    assertEquals(expected, SourceTestUtils.readFromSource(hashTableSource, null));
+  }
+
+  @Test
+  public void testSplitEqualsUnsplit() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    fakeTableHashWrapper.partitions = Arrays.asList(getKey(2), getKey(4));
+    SourceTestUtils.assertSourcesEqualReferenceSource(
+        hashTableSource, hashTableSource.split(1, null), null);
+  }
+
+  @Test
+  public void testUnstartedReaderEqualsStarted() throws Exception {
+    setupTestData(START_ROW, STOP_ROW, 6);
+    SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(
+        hashTableSource.createReader(null), null);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
new file mode 100644
index 0000000000..a3aba3f756
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HadoopHashTableSourceTest.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.HashBasedReader;
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.util.List;
+import junit.framework.TestCase;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.BoundedSource.BoundedReader;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HadoopHashTableSourceTest extends TestCase {
+
+  HadoopHashTableSource source;
+  FakeTableHashWrapper fakeTableHashWrapper;
+
+  private static final ValueProvider<String> PROJECT_ID = StaticValueProvider.of("test-project");
+  private static final ValueProvider<String> HASH_TABLE_OUTPUT_PATH_DIR =
+      StaticValueProvider.of("gs://my-bucket/outputDir");
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("a".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW = new ImmutableBytesWritable("z".getBytes());
+  private static final ImmutableBytesWritable PARTITION1 =
+      new ImmutableBytesWritable("d".getBytes());
+  private static final ImmutableBytesWritable PARTITION2 =
+      new ImmutableBytesWritable("g".getBytes());
+  private static final ImmutableBytesWritable EMPTY_ROW_KEY =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    fakeTableHashWrapper = new FakeTableHashWrapper();
+  }
+
+  private List<BoundedSource<RangeHash>> getSplitSources(
+      List<ImmutableBytesWritable> partitions,
+      ImmutableBytesWritable startRow,
+      ImmutableBytesWritable stopRow)
+      throws IOException {
+    fakeTableHashWrapper.startRowInclusive = startRow;
+    fakeTableHashWrapper.stopRowExclusive = stopRow;
+    fakeTableHashWrapper.partitions = partitions;
+
+    source =
+        new HadoopHashTableSource(
+            PROJECT_ID,
+            HASH_TABLE_OUTPUT_PATH_DIR,
+            startRow,
+            stopRow,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+    return (List<BoundedSource<RangeHash>>) source.split(0, null);
+  }
+
+  private void testSourceSplits(
+      List<ImmutableBytesWritable> partitions,
+      ImmutableBytesWritable startRow,
+      ImmutableBytesWritable stopRow,
+      List<BoundedSource<RangeHash>> expectedSources)
+      throws IOException {
+    assertEquals(expectedSources, getSplitSources(partitions, startRow, stopRow));
+  }
+
+  @Test
+  public void testSplitZeroPartitions() throws IOException {
+    // Row range [a-z) with no splits.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, STOP_ROW));
+    testSourceSplits(ImmutableList.of(), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitOnePartition() throws IOException {
+    // Row range [a-z) with 1 splits.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testMultiplePartitons() throws IOException {
+    // Row range [a-z) with splits on {d,g}. The data files will be for {[a,d), [d,g), [g,z)}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), START_ROW, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitEmptyStartRow() throws IOException {
+    // Row range [""-z) with splits on {d,g}. The data files will be for {["",d), [d,g), [g,z)}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, EMPTY_ROW_KEY, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, STOP_ROW));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), EMPTY_ROW_KEY, STOP_ROW, expected);
+  }
+
+  @Test
+  public void testSplitEmptyStopRow() throws IOException {
+    // Row range [a-"") with splits on {d,g}. The data files will be for {[a,d), [d,g), [g,"")}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, START_ROW, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, EMPTY_ROW_KEY));
+    testSourceSplits(ImmutableList.of(PARTITION1, PARTITION2), START_ROW, EMPTY_ROW_KEY, expected);
+  }
+
+  @Test
+  public void testSplitFullTableScan() throws IOException {
+    // Row range [""-"") with splits on {d,g}. The data files will be for {["",d), [d,g), [g,"")}.
+    List<BoundedSource<RangeHash>> expected =
+        ImmutableList.of(
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, EMPTY_ROW_KEY, PARTITION1),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION1, PARTITION2),
+            new HadoopHashTableSource(
+                PROJECT_ID, HASH_TABLE_OUTPUT_PATH_DIR, PARTITION2, EMPTY_ROW_KEY));
+    testSourceSplits(
+        ImmutableList.of(PARTITION1, PARTITION2), EMPTY_ROW_KEY, EMPTY_ROW_KEY, expected);
+  }
+
+  @Test
+  public void testCreateReaderWithoutSplit() throws IOException {
+    source =
+        new HadoopHashTableSource(
+            PROJECT_ID,
+            HASH_TABLE_OUTPUT_PATH_DIR,
+            // When split is not called, start/stop are uninitialized. Start/stop are runtime params
+            // and are initialized in split/createReader.
+            null,
+            null,
+            new FakeTableHashWrapperFactory(fakeTableHashWrapper));
+    // Setup boundaries on the TableHashWrapper to be used in Source.
+    fakeTableHashWrapper.startRowInclusive = START_ROW;
+    fakeTableHashWrapper.stopRowExclusive = STOP_ROW;
+
+    // Create a new Reader
+    BoundedReader<RangeHash> reader = source.createReader(null);
+
+    // Validate that the reader was properly created.
+    assertEquals(HashBasedReader.class, reader.getClass());
+    assertEquals(source, reader.getCurrentSource());
+    HashBasedReader hashBasedReader = (HashBasedReader) reader;
+    assertEquals(START_ROW, hashBasedReader.startRowInclusive);
+    assertEquals(STOP_ROW, hashBasedReader.stopRowExclusive);
+  }
+
+  @Test
+  public void testCreateReaderAfterSplit() throws IOException {
+    // Single partitions will return a 2 sources.
+    List<BoundedSource<RangeHash>> splitSources =
+        getSplitSources(ImmutableList.of(PARTITION1), START_ROW, STOP_ROW);
+    BoundedSource<RangeHash> splitHashSource = splitSources.get(0);
+
+    // Create a new Reader
+    BoundedReader<RangeHash> reader = splitHashSource.createReader(null);
+
+    // Validate that the reader was properly created.
+    assertEquals(HashBasedReader.class, reader.getClass());
+    assertEquals(splitHashSource, reader.getCurrentSource());
+    HashBasedReader hashBasedReader = (HashBasedReader) reader;
+    assertEquals(START_ROW, hashBasedReader.startRowInclusive);
+    assertEquals(PARTITION1, hashBasedReader.stopRowExclusive);
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
new file mode 100644
index 0000000000..f58becf3cb
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/HashBasedSourceSerializationTest.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import static com.google.common.truth.Truth.assertWithMessage;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import junit.framework.TestCase;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class HashBasedSourceSerializationTest extends TestCase {
+
+  public static final String SOURCE_HASH_DIR = "gs://my-bucket/outputDir";
+  public static final String PROJECT_ID = "test-project";
+  private static final ImmutableBytesWritable START_ROW =
+      new ImmutableBytesWritable("a".getBytes());
+  private static final ImmutableBytesWritable STOP_ROW = new ImmutableBytesWritable("y".getBytes());
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+  }
+
+  @Test
+  public void testSerializeWithValueProviders() throws IOException {
+    checkSerialization(
+        new HadoopHashTableSource(
+            StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR)));
+  }
+
+  @Test
+  public void testSerializeWithStartStop() throws IOException {
+    checkSerialization(
+        new HadoopHashTableSource(
+            StaticValueProvider.of(PROJECT_ID),
+            StaticValueProvider.of(SOURCE_HASH_DIR),
+            new ImmutableBytesWritable(START_ROW),
+            new ImmutableBytesWritable(STOP_ROW)));
+  }
+
+  @Test
+  public void testBufferedSourceSerialize() {
+    checkSerialization(
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR))));
+  }
+
+  @Test
+  public void testBufferedSourceSerializeWithBatchSize() {
+    checkSerialization(
+        new BufferedHadoopHashTableSource(
+            new HadoopHashTableSource(
+                StaticValueProvider.of(PROJECT_ID), StaticValueProvider.of(SOURCE_HASH_DIR)),
+            5));
+  }
+
+  private static void checkSerialization(Object source) {
+    try {
+      Object deserialized = serializeDeserialize(source);
+      checkClassDeclaresSerialVersionUid(source.getClass());
+      assertEquals(source, deserialized);
+    } catch (IOException | ClassNotFoundException e) {
+      fail(e.toString());
+    }
+  }
+
+  private static void checkClassDeclaresSerialVersionUid(Class cls) {
+    String uid = "serialVersionUID";
+    for (Field field : cls.getDeclaredFields()) {
+      if (field.getName() == uid) {
+        int modifiers = field.getModifiers();
+        assertWithMessage(field + " is not static").that(Modifier.isStatic(modifiers)).isTrue();
+        assertWithMessage(field + " is not final").that(Modifier.isFinal(modifiers)).isTrue();
+        assertWithMessage(field + " is not private").that(Modifier.isPrivate(modifiers)).isTrue();
+        assertWithMessage(field + " must be long")
+            .that(field.getType().getSimpleName())
+            .isEqualTo("long");
+        return;
+      }
+    }
+    fail(cls + " does not declare serialVersionUID");
+  }
+
+  private static Object serializeDeserialize(Object obj)
+      throws IOException, ClassNotFoundException {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    try (ObjectOutputStream outStream = new ObjectOutputStream(bos)) {
+      outStream.writeObject(obj);
+    }
+
+    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
+    try (ObjectInputStream inStream = new ObjectInputStream(bis)) {
+      return inStream.readObject();
+    }
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
new file mode 100644
index 0000000000..5f644e3b50
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/validation/RangeHashCoderTest.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2021 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.validation;
+
+import com.google.cloud.bigtable.beam.validation.HadoopHashTableSource.RangeHash;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.testing.CoderProperties;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class RangeHashCoderTest {
+  private static final RangeHashCoder TEST_CODER = new RangeHashCoder();
+  private static final ImmutableBytesWritable START =
+      new ImmutableBytesWritable("Start".getBytes());
+  private static final ImmutableBytesWritable STOP = new ImmutableBytesWritable("Stop".getBytes());
+  private static final ImmutableBytesWritable HASH = new ImmutableBytesWritable("hash".getBytes());
+  private static final ImmutableBytesWritable EMPTY =
+      new ImmutableBytesWritable(HConstants.EMPTY_BYTE_ARRAY);
+
+  @Test
+  public void encodeRangeHash() throws Exception {
+    CoderProperties.coderDecodeEncodeEqual(TEST_CODER, RangeHash.of(START, STOP, HASH));
+  }
+
+  @Test(expected = CoderException.class)
+  public void encodeNullThrowsCoderException() throws Exception {
+    CoderUtils.encodeToByteArray(TEST_CODER, null);
+  }
+
+  @Test
+  public void testEncodedTypeDescriptor() throws Exception {
+    Assert.assertEquals(TEST_CODER.getEncodedTypeDescriptor(), TypeDescriptor.of(RangeHash.class));
+  }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md b/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md
new file mode 100644
index 0000000000..3d9b722bb9
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md
@@ -0,0 +1,18 @@
+# Generating the test HBase snapshot for HBase snapshot import integration tests
+
+The file `generate_test_data.txt` is an HBase command line command sequence 
+used to generated the testing HBase snapshot data.
+
+If you need to modify the test data used by `bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java`, 
+Please make sure you have HBase installed and export `<path-to-hbase>/bin` to your PATH. 
+
+Then:
+
+    $ hbase shell ./generate_test_data.txt
+    $ hbase org.apache.hadoop.hbase.snapshot.ExportSnapshot -Dmapreduce.framework.name=local -snapshot test-snapshot -copy-to file:///<local-path>/data
+    
+    $ cd <local-path>
+    $ gsutil -m cp -r ./data/ gs://<test-bucket>/integration-test/
+    
+After this, you use be able to run the integration test with your new data by specifying
+`-Dcloud.test.data.folder=gs://<test-bucket>/integration-test/`
\ No newline at end of file
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/log4j.properties b/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/log4j.properties
index 7f9118c7bc..c609eb001a 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/log4j.properties
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/log4j.properties
@@ -22,3 +22,7 @@ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
 log4j.category.org.apache.beam.sdk.io.FileBasedSource=WARN
 log4j.category.com.google.cloud.bigtable.beam.sequencefiles.SequenceFileSource=WARN
+# make hbase snapshot import integration tests output less verbose.
+log4j.category.org.apache.hadoop=WARN
+log4j.category.org.apache.beam.runners.dataflow.util.MonitoringUtil=WARN
+log4j.category.org.apache.beam.runners.dataflow.util.MonitoringUtil.LoggingHandler=WARN
\ No newline at end of file
diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-mapreduce/pom.xml b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-mapreduce/pom.xml
index ef0b866ec6..7cf1993350 100644
--- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-mapreduce/pom.xml
+++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-mapreduce/pom.xml
@@ -39,6 +39,16 @@ limitations under the License.
     <hadoop.scope>provided</hadoop.scope>
   </properties>
 
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-common</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>${hadoop.scope}</scope>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
   <dependencies>
     <dependency>
       <groupId>${project.groupId}</groupId>
diff --git a/pom.xml b/pom.xml
index 215a7bc69e..9e6d60dc08 100644
--- a/pom.xml
+++ b/pom.xml
@@ -81,6 +81,7 @@ limitations under the License.
     <guava.version>30.0-android</guava.version>
     <beam-guava.version>20.0</beam-guava.version>
     <beam-auto-value.version>1.7</beam-auto-value.version>
+    <gcs-guava.version>29.0-jre</gcs-guava.version>
     <beam-grpc.version>1.29.0</beam-grpc.version>
 
     <!-- Benchmarks related dependencies -->
@@ -165,7 +166,7 @@ limitations under the License.
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-shade-plugin</artifactId>
-          <version>3.2.2</version>
+          <version>3.2.4</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -175,7 +176,7 @@ limitations under the License.
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-javadoc-plugin</artifactId>
-          <version>3.1.1</version>
+          <version>3.2.0</version>
           <configuration>
             <doclint>none</doclint>
             <windowtitle>