From 85109f9f50510b8b9aa6a94f9e4e03e1affe9ac5 Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gmechali@google.com>
Date: Fri, 24 Apr 2026 15:38:33 -0400
Subject: [PATCH 1/2] Terraform main

---
 import-automation/terraform/main.tf | 147 +++++++++++++++-------------
 1 file changed, 77 insertions(+), 70 deletions(-)

diff --git a/import-automation/terraform/main.tf b/import-automation/terraform/main.tf
index 7c6ed1fd1d..c7c063d2d4 100644
--- a/import-automation/terraform/main.tf
+++ b/import-automation/terraform/main.tf
@@ -1,24 +1,16 @@
-# Terraform deployment for Data Commons Import Automation Workflow
+# Copyright 2026 Google LLC
 #
-# Usage:
-# - Authenticate and set up application default credentials for Terraform to access GCP using 'gcloud auth login --update-adc'.
-# - Obtain DataCommons API key: Get an API key portal https://apikeys.datacommons.org/ to be used as the `dc_api_key` variable.
-# - Deploy the infrastructure and resources defined in this configuration using 'terraform apply'.
-# - The output service account needs to have required permissions to access external resources.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-# Input variables:
-# - GCP project id
-# - DC API key
+#      http://www.apache.org/licenses/LICENSE-2.0
 #
-# This file sets up:
-# - Necessary GCP APIs
-# - Secret Manager for the import-config secret
-# - GCS Buckets for imports, mounting, and Dataflow templates
-# - Spanner Instance and Database with schema
-# - Artifact Registry for hosting Docker images (Flex Template & Executor)
-# - Pub/Sub Topic and Subscription for triggering imports
-# - Cloud Functions, Workflows, and Ingestion Pipeline
-# - Unified Service Account with necessary IAM roles for Workflows, Functions, and Pub/Sub
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 terraform {
   required_providers {
@@ -85,6 +77,25 @@ variable "dc_api_key" {
   sensitive   = true
 }
 
+# New Variables
+variable "ingestion_helper_image" {
+  description = "The Docker image for the ingestion helper service"
+  type        = string
+  default     = "us-docker.pkg.dev/datcom-ci/gcr.io/datacommons-ingestion-helper:latest"
+}
+
+variable "spanner_project_id" {
+  description = "Spanner Project ID"
+  type        = string
+  default     = ""
+}
+
+variable "gcs_bucket_id" {
+  description = "GCS Bucket ID for imports (overrides default generated bucket)"
+  type        = string
+  default     = ""
+}
+
 # --- APIs ---
 
 locals {
@@ -104,6 +115,8 @@ locals {
     "storage.googleapis.com",
     "workflows.googleapis.com",
   ]
+  spanner_project_id = var.spanner_project_id != "" ? var.spanner_project_id : var.project_id
+  gcs_bucket_id      = var.gcs_bucket_id != "" ? var.gcs_bucket_id : google_storage_bucket.import_bucket.name
 }
 
 resource "google_project_service" "services" {
@@ -172,12 +185,6 @@ resource "google_storage_bucket" "mount_bucket" {
 
 # --- Cloud Functions Source Packaging ---
 
-data "archive_file" "ingestion_helper_source" {
-  type        = "zip"
-  source_dir  = "${path.module}/../workflow/ingestion-helper"
-  output_path = "${path.module}/ingestion_helper.zip"
-}
-
 data "archive_file" "aggregation_helper_source" {
   type        = "zip"
   source_dir  = "${path.module}/../workflow/aggregation-helper"
@@ -190,12 +197,6 @@ data "archive_file" "import_helper_source" {
   output_path = "${path.module}/import_helper.zip"
 }
 
-resource "google_storage_bucket_object" "ingestion_helper_zip" {
-  name   = "function-source/ingestion_helper.${data.archive_file.ingestion_helper_source.output_md5}.zip"
-  bucket = google_storage_bucket.import_bucket.name
-  source = data.archive_file.ingestion_helper_source.output_path
-}
-
 resource "google_storage_bucket_object" "aggregation_helper_zip" {
   name   = "function-source/aggregation_helper.${data.archive_file.aggregation_helper_source.output_md5}.zip"
   bucket = google_storage_bucket.import_bucket.name
@@ -210,41 +211,6 @@ resource "google_storage_bucket_object" "import_helper_zip" {
 
 # --- Cloud Functions ---
 
-resource "google_cloudfunctions2_function" "ingestion_helper" {
-  name        = "spanner-ingestion-helper"
-  location    = var.region
-  project     = var.project_id
-  description = "Helper for Spanner ingestion"
-
-  build_config {
-    runtime     = "python312"
-    entry_point = "ingestion_helper"
-    source {
-      storage_source {
-        bucket = google_storage_bucket.import_bucket.name
-        object = google_storage_bucket_object.ingestion_helper_zip.name
-      }
-    }
-  }
-
-  service_config {
-    max_instance_count = 10
-    available_memory   = "256M"
-    timeout_seconds    = 60
-    service_account_email = google_service_account.automation_sa.email
-    environment_variables = {
-      PROJECT_ID          = var.project_id
-      SPANNER_PROJECT_ID  = var.project_id
-      SPANNER_INSTANCE_ID = var.spanner_instance_id
-      SPANNER_DATABASE_ID = var.spanner_database_id
-      GCS_BUCKET_ID       = google_storage_bucket.import_bucket.name
-      LOCATION            = var.region
-    }
-  }
-
-  depends_on = [google_project_service.services]
-}
-
 resource "google_cloudfunctions2_function" "aggregation_helper" {
   name        = "import-aggregation-helper"
   location    = var.region
@@ -269,10 +235,10 @@ resource "google_cloudfunctions2_function" "aggregation_helper" {
     service_account_email = google_service_account.automation_sa.email
     environment_variables = {
       PROJECT_ID          = var.project_id
-      SPANNER_PROJECT_ID  = var.project_id
+      SPANNER_PROJECT_ID  = local.spanner_project_id
       SPANNER_INSTANCE_ID = var.spanner_instance_id
       SPANNER_DATABASE_ID = var.spanner_database_id
-      GCS_BUCKET_ID       = google_storage_bucket.import_bucket.name
+      GCS_BUCKET_ID       = local.gcs_bucket_id
       LOCATION            = var.region
       BQ_DATASET_ID       = var.bq_dataset_id
     }
@@ -306,8 +272,50 @@ resource "google_cloudfunctions2_function" "import_helper" {
     environment_variables = {
       PROJECT_ID    = var.project_id
       LOCATION      = var.region
-      GCS_BUCKET_ID = google_storage_bucket.import_bucket.name
+      GCS_BUCKET_ID = local.gcs_bucket_id
+    }
+  }
+
+  depends_on = [google_project_service.services]
+}
+
+# --- Cloud Run Service ---
+
+resource "google_cloud_run_v2_service" "ingestion_helper_service" {
+  name     = "ingestion-helper-service"
+  location = var.region
+  project  = var.project_id
+
+  template {
+    containers {
+      image = var.ingestion_helper_image
+      env {
+        name  = "PROJECT_ID"
+        value = var.project_id
+      }
+      env {
+        name  = "SPANNER_PROJECT_ID"
+        value = local.spanner_project_id
+      }
+      env {
+        name  = "SPANNER_INSTANCE_ID"
+        value = var.spanner_instance_id
+      }
+      env {
+        name  = "SPANNER_DATABASE_ID"
+        value = var.spanner_database_id
+      }
+      env {
+        name  = "GCS_BUCKET_ID"
+        value = local.gcs_bucket_id
+      }
+      env {
+        name  = "LOCATION"
+        value = var.region
+      }
     }
+    # Using the default compute SA to avoid permission issues
+    service_account = "965988403328-compute@developer.gserviceaccount.com"
   }
 
   depends_on = [google_project_service.services]
@@ -343,7 +351,7 @@ resource "google_workflows_workflow" "spanner_ingestion_workflow" {
   user_env_vars = {
     LOCATION            = var.region
     PROJECT_ID          = var.project_id
-    SPANNER_PROJECT_ID  = var.project_id
+    SPANNER_PROJECT_ID  = local.spanner_project_id
     SPANNER_INSTANCE_ID = var.spanner_instance_id
     SPANNER_DATABASE_ID = var.spanner_database_id
   }
@@ -456,4 +464,3 @@ output "automation_service_account_email" {
   value       = google_service_account.automation_sa.email
   description = "The email of the service account used for import automation."
 }
-

From 3e664d28e21035dd0e8b6ba5e6867c2f584e5819 Mon Sep 17 00:00:00 2001
From: Gabriel Mechali <gmechali@google.com>
Date: Fri, 24 Apr 2026 15:40:44 -0400
Subject: [PATCH 2/2] Readds instructions

---
 import-automation/terraform/main.tf | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/import-automation/terraform/main.tf b/import-automation/terraform/main.tf
index c7c063d2d4..b8794a793f 100644
--- a/import-automation/terraform/main.tf
+++ b/import-automation/terraform/main.tf
@@ -12,6 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Terraform deployment for Data Commons Import Automation Workflow
+#
+# Usage:
+# - Authenticate and set up application default credentials for Terraform to access GCP using 'gcloud auth login --update-adc'.
+# - Obtain DataCommons API key: Get an API key portal https://apikeys.datacommons.org/ to be used as the `dc_api_key` variable.
+# - Deploy the infrastructure and resources defined in this configuration using 'terraform apply'.
+# - The output service account needs to have required permissions to access external resources.
+#
+# Input variables:
+# - GCP project id
+# - DC API key
+#
+# This file sets up:
+# - Necessary GCP APIs
+# - Secret Manager for the import-config secret
+# - GCS Buckets for imports, mounting, and Dataflow templates
+# - Spanner Instance and Database with schema
+# - Artifact Registry for hosting Docker images (Flex Template & Executor)
+# - Pub/Sub Topic and Subscription for triggering imports
+# - Cloud Functions, Workflows, and Ingestion Pipeline
+# - Unified Service Account with necessary IAM roles for Workflows, Functions, and Pub/Sub
+
 terraform {
   required_providers {
     google = {