From 397ea29d99b44a39887724cd44ec4b3056791aee Mon Sep 17 00:00:00 2001
From: gurusai-voleti <gvoleti@google.com>
Date: Fri, 16 Jan 2026 22:43:40 +0530
Subject: [PATCH 1/6] chore: Migrate gsutil usage to gcloud storage (#1703)

Co-authored-by: Andrew Gold <41129777+agold-rh@users.noreply.github.com>
---
 examples/dataflow-flex-python/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/dataflow-flex-python/README.md b/examples/dataflow-flex-python/README.md
index 512671c604d..d486a7bc000 100644
--- a/examples/dataflow-flex-python/README.md
+++ b/examples/dataflow-flex-python/README.md
@@ -93,10 +93,10 @@ gcloud storage buckets create gs://$INPUT_BUCKET_NAME --location $LOCATION --pro
 # Create a bucket for dataflow staging and temp locations
 gcloud storage buckets create gs://$STAGING_BUCKET_NAME --location $LOCATION --project $PROJECT_ID
 
-gsutil iam ch serviceAccount:dataflow-worker-sa@$PROJECT_ID.iam.gserviceaccount.com:roles/storage.legacyBucketWriter gs://$STAGING_BUCKET_NAME
+gcloud storage buckets add-iam-policy-binding gs://$STAGING_BUCKET_NAME --member="serviceAccount:dataflow-worker-sa@$PROJECT_ID.iam.gserviceaccount.com" --role="roles/storage.legacyBucketWriter"
 
 # Assign Legacy Bucket Writer Role on Input bucket in order to move the object
-gsutil iam ch serviceAccount:dataflow-worker-sa@$PROJECT_ID.iam.gserviceaccount.com:roles/storage.legacyBucketWriter gs://$INPUT_BUCKET_NAME
+gcloud storage buckets add-iam-policy-binding gs://$INPUT_BUCKET_NAME --member="serviceAccount:dataflow-worker-sa@$PROJECT_ID.iam.gserviceaccount.com" --role="roles/storage.legacyBucketWriter"
 ```
 
 #### Create BQ Dataset

From 88b5b1c8f179dd7c6b0ff3c1e6a4e84c7e60fc77 Mon Sep 17 00:00:00 2001
From: gurusai-voleti <gvoleti@google.com>
Date: Fri, 16 Jan 2026 22:47:19 +0530
Subject: [PATCH 2/6] chore: Migrate gsutil usage to gcloud storage (#1705)

Co-authored-by: Andrew Gold <41129777+agold-rh@users.noreply.github.com>
---
 examples/dataflow-production-ready/python/README.md           | 3 +--
 .../dataflow-production-ready/run_system_integration_test.sh  | 4 ++--
 examples/dataflow-production-ready/terraform/README.MD        | 3 +--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/examples/dataflow-production-ready/python/README.md b/examples/dataflow-production-ready/python/README.md
index 1eb7f5fec6d..03b37cc3774 100644
--- a/examples/dataflow-production-ready/python/README.md
+++ b/examples/dataflow-production-ready/python/README.md
@@ -37,7 +37,7 @@ gcloud config set project $GCP_PROJECT
 
 Then, create a GCS bucket for this demo
 ```
-gsutil mb -l $REGION -p $GCP_PROJECT gs://$BUCKET_NAME
+gcloud storage buckets create gs://$BUCKET_NAME --location $REGION --project $GCP_PROJECT
 ```
 
 
@@ -181,4 +181,3 @@ To trigger a build on certain actions (e.g. commits to master)
 2. Configure the trigger
 3. Point the trigger to the [cloudbuild.yaml](ml_preproc/cloudbuild.yaml) file in the repository
 4. Add the substitution variables as explained in the [Substitution variables](#substitution-variables) section.
-
diff --git a/examples/dataflow-production-ready/run_system_integration_test.sh b/examples/dataflow-production-ready/run_system_integration_test.sh
index cfee80ef6e4..5d8e468c2c2 100755
--- a/examples/dataflow-production-ready/run_system_integration_test.sh
+++ b/examples/dataflow-production-ready/run_system_integration_test.sh
@@ -53,8 +53,8 @@ SELECT r.flag AND e.flag FROM e, r"
 
 echo "Preparing GCP test resources.."
 gcloud config set project "${GCP_PROJECT}"
-gsutil mb -c standard -l "${REGION}" "${GCS_BUCKET}"
-gsutil cp "${LOCAL_INPUT_PATH}" "${GCS_BUCKET}/input/"
+gcloud storage buckets create --default-storage-class=standard --location="${REGION}" "${GCS_BUCKET}"
+gcloud storage cp "${LOCAL_INPUT_PATH}" "${GCS_BUCKET}/input/"
 #replace with terraform script and pass the dataset as var
 bq mk --location "${REGION}" "${DATASET}"
 bq mk --table "${RESULTS_TABLE}" schema/ml_preproc_results.json
diff --git a/examples/dataflow-production-ready/terraform/README.MD b/examples/dataflow-production-ready/terraform/README.MD
index e034d15171e..10ec580d607 100644
--- a/examples/dataflow-production-ready/terraform/README.MD
+++ b/examples/dataflow-production-ready/terraform/README.MD
@@ -1,4 +1,3 @@
-
 ## Intro
 
 This Terraform module will automate the creation of infrastructure components needed for the data pipeline.
@@ -22,7 +21,7 @@ export BQ_DATASET=<BigQuery Dataset Name>
 
 * Update the Terraform backend bucket name in [backend.tf](backend.tf) (if necessary) and create a bucket with the same name
 ```
-gsutil mb -l $REGION -p $GCP_PROJECT gs://<bucket name as in backend.tf> 
+gcloud storage buckets create --location $REGION --project $GCP_PROJECT gs://<bucket name as in backend.tf> 
 ```
 
 * From the **repo root folder**, set up the environment via Cloud Build

From 88e642f41e8dcea57eddc268f5500f417f2c8a99 Mon Sep 17 00:00:00 2001
From: gurusai-voleti <gvoleti@google.com>
Date: Fri, 16 Jan 2026 22:57:39 +0530
Subject: [PATCH 3/6] chore: Migrate gsutil usage to gcloud storage (#1706)

Co-authored-by: Andrew Gold <41129777+agold-rh@users.noreply.github.com>
---
 examples/dataflow-xml-pubsub-to-gcs/python/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/dataflow-xml-pubsub-to-gcs/python/README.md b/examples/dataflow-xml-pubsub-to-gcs/python/README.md
index 0849cba35e2..56ec337fa3e 100644
--- a/examples/dataflow-xml-pubsub-to-gcs/python/README.md
+++ b/examples/dataflow-xml-pubsub-to-gcs/python/README.md
@@ -95,7 +95,7 @@ gcloud pubsub topics create $TOPIC_ID
 The output will write to a GCS bucket:
 ```
 export BUCKET_NAME=<CHANGE_ME>
-gsutil mb gs://$BUCKET_NAME
+gcloud storage buckets create gs://$BUCKET_NAME
 ```
 
 # 4. Run the test
@@ -151,8 +151,8 @@ monitoring screen.
 
 List the generated files in the GCS bucket and inspect their contents
 ```
-gsutil ls gs://${BUCKET_NAME}/output_location/
-gsutil cat gs://${BUCKET_NAME}/output_location/*
+gcloud storage ls gs://${BUCKET_NAME}/output_location/
+gcloud storage cat gs://${BUCKET_NAME}/output_location/*
 ```
 
 # 5. Clean up
@@ -164,11 +164,11 @@ gcloud pubsub topics delete $TOPIC_ID
 ```
 2. Delete the GCS files
 ```
-gsutil -m rm -rf "gs://${BUCKET_NAME}/output_location/*"
+gcloud storage rm --recursive --continue-on-error "gs://${BUCKET_NAME}/output_location/*"
 ```
 3. Remove the GCS bucket
 ```
-gsutil rb gs://${BUCKET_NAME}
+gcloud storage buckets delete gs://${BUCKET_NAME}
 ```
 4. **Optionally** Revoke the authentication credentials that you created, and delete the local credential file.
 ```

From 9a0ca045928b64ee6f1f764a35d35f3a0f13be90 Mon Sep 17 00:00:00 2001
From: gurusai-voleti <gvoleti@google.com>
Date: Fri, 16 Jan 2026 23:09:31 +0530
Subject: [PATCH 4/6] chore: Migrate gsutil usage to gcloud storage (#1707)

Co-authored-by: Andrew Gold <41129777+agold-rh@users.noreply.github.com>
---
 examples/dataproc-gcs-connector/connectors.sh         | 2 +-
 examples/dataproc-gcs-connector/test_gcs_connector.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/dataproc-gcs-connector/connectors.sh b/examples/dataproc-gcs-connector/connectors.sh
index f40087800a3..665032e8250 100644
--- a/examples/dataproc-gcs-connector/connectors.sh
+++ b/examples/dataproc-gcs-connector/connectors.sh
@@ -126,7 +126,7 @@ update_connector_url() {
   fi
 
   # UPDATED this line to pull correct GCS connector
-  gsutil cp "gs://gcs-connector-init_actions/gcs-connector-${HADOOP_VERSION}-shaded.jar" "${vm_connectors_dir}/"
+  gcloud storage cp "gs://gcs-connector-init_actions/gcs-connector-${HADOOP_VERSION}-shaded.jar" "${vm_connectors_dir}/"
 
   local -r jar_name=${url##*/}
 
diff --git a/examples/dataproc-gcs-connector/test_gcs_connector.sh b/examples/dataproc-gcs-connector/test_gcs_connector.sh
index 39075692156..7aef25569ce 100755
--- a/examples/dataproc-gcs-connector/test_gcs_connector.sh
+++ b/examples/dataproc-gcs-connector/test_gcs_connector.sh
@@ -48,4 +48,4 @@ gcloud dataproc jobs submit hive --region=us-central1 \
   --cluster=${YOUR_CLUSTER} \
   -e="$Q1" -e="$Q2" -e="$Q3"
 
-gsutil cat gs://${YOUR_BUCKET}/data_files/top_ten/000000_0
+gcloud storage cat gs://${YOUR_BUCKET}/data_files/top_ten/000000_0

From c8f22bbf8cbcff9819bd9038e0f605d09a861a03 Mon Sep 17 00:00:00 2001
From: gurusai-voleti <gvoleti@google.com>
Date: Fri, 16 Jan 2026 23:17:46 +0530
Subject: [PATCH 5/6] chore: Migrate gsutil usage to gcloud storage (#1708)

Co-authored-by: Andrew Gold <41129777+agold-rh@users.noreply.github.com>
---
 examples/dataproc-idle-shutdown/README.md                   | 4 ++--
 examples/dataproc-idle-shutdown/create-idlemonitoringjob.sh | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/dataproc-idle-shutdown/README.md b/examples/dataproc-idle-shutdown/README.md
index 73b792056b2..2a0af476bec 100644
--- a/examples/dataproc-idle-shutdown/README.md
+++ b/examples/dataproc-idle-shutdown/README.md
@@ -36,7 +36,7 @@ git clone https://github.com/GoogleCloudPlatform/professional-services.git
 ```
 Copy all artifacts to Cloud Storage:
 ```
-gsutil cp ./professional-services/examples/dataproc-idle-check/*sh gs://<BUCKET>
+gcloud storage cp ./professional-services/examples/dataproc-idle-check/*sh gs://<BUCKET>
 ```
 
 ### Cluster start: Start the cluster specifying key parameters
@@ -44,7 +44,7 @@ gsutil cp ./professional-services/examples/dataproc-idle-check/*sh gs://<BUCKET>
 2.  [Mandatory] Specify the location of the idle-check.sh script as the value of the metadata key “script_storage_location”. The location of the idle-check.sh script and the maximum idle time should be specified as metadata using the “script_storage_location” and “max-idle” keys, respectively.
 3.  [Mandatory] Specify the maximum idle time to allow the cluster to be idle as the value of the metadata key “max-idle”. Similar to the parameter associated with Scheduled Cluster deletion, the max-idle duration parameter should be provided in IntegerUnit format, where the unit can be “s, m, h, d” (seconds, minutes, hours, days, respectively). Examples: “30m” or “1d” (30 minutes or 1 day from when the cluster becomes idle).
 4.  [Optional] Specify, as the value of the metadata key “key_process_list”, a semi-colin separated list of process names (in addition to YARN jobs and active SSH connections) for which the cluster should be considered active.
-5.  [Optional] Specify if the cluster should write diagnostic logs to the Cloud Storage staging bucket (TRUE/FALSE) as the value of the metadata key "persist_diagnostic_tarball" (TRUE). Unless specified, the default value is FALSE. The diagnostic output is saved in a folder specific to the job under which the DIAGNOSE command was run, the best way to locate the diagnostic output is " gsutil ls gs://[GCS STAGING BUCKET]/google-cloud-dataproc-metainfo/*/*/diagnostic.tar.gz".  
+5.  [Optional] Specify if the cluster should write diagnostic logs to the Cloud Storage staging bucket (TRUE/FALSE) as the value of the metadata key "persist_diagnostic_tarball" (TRUE). Unless specified, the default value is FALSE. The diagnostic output is saved in a folder specific to the job under which the DIAGNOSE command was run, the best way to locate the diagnostic output is " gcloud storage ls gs://[GCS STAGING BUCKET]/google-cloud-dataproc-metainfo/*/*/diagnostic.tar.gz".  
 
 >Note: [Google APIs](https://developers.google.com/identity/protocols/googlescopes) must also be included in scopes in order for the scripts to read and write cluster metadata.
 
diff --git a/examples/dataproc-idle-shutdown/create-idlemonitoringjob.sh b/examples/dataproc-idle-shutdown/create-idlemonitoringjob.sh
index 6070b443841..9cbeec69cde 100644
--- a/examples/dataproc-idle-shutdown/create-idlemonitoringjob.sh
+++ b/examples/dataproc-idle-shutdown/create-idlemonitoringjob.sh
@@ -90,7 +90,7 @@ function startIdleJobChecker() {
       cd DataprocShutdown || exit
 
       # copy the script from GCS
-      gsutil cp "${SCRIPT_STORAGE_LOCATION}/idle-check.sh" .
+      gcloud storage cp "${SCRIPT_STORAGE_LOCATION}/idle-check.sh" .
       # make it executable
       chmod 700 idle-check.sh
       # run IsIdle script

From 4fc61a468cde862ea5e8400947fe934a73838b24 Mon Sep 17 00:00:00 2001
From: gurusai-voleti <gvoleti@google.com>
Date: Sat, 17 Jan 2026 02:08:06 +0530
Subject: [PATCH 6/6] chore: Migrate gsutil usage to gcloud storage (#1709)

Co-authored-by: Andrew Gold <41129777+agold-rh@users.noreply.github.com>
---
 .../dataproc-job-optimization-guide/README.md    | 16 ++++++++--------
 .../scripts/setup.sh                             |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/examples/dataproc-job-optimization-guide/README.md b/examples/dataproc-job-optimization-guide/README.md
index 586164f465e..5bbc4a33572 100755
--- a/examples/dataproc-job-optimization-guide/README.md
+++ b/examples/dataproc-job-optimization-guide/README.md
@@ -57,7 +57,7 @@ A sizing cluster can help determine the right number of workers for your applica
 
 
 ```bash
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-sizing scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -93,7 +93,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-2x8-standard \
   --worker-boot-disk-size=1000GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-2x8-standard scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -114,7 +114,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-4x4-standard \
   --worker-boot-disk-size=1000GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-4x4-standard scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -135,7 +135,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-8x2-standard \
   --worker-boot-disk-size=1000GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-standard scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -169,7 +169,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-8x2-balanced \
   --worker-boot-disk-size=500GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-balanced scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -190,7 +190,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-8x2-ssd \
   --worker-boot-disk-size=250GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-ssd scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -213,7 +213,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-8x2-ssd-costop \
   --worker-boot-disk-size=30GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-ssd-costop scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -230,7 +230,7 @@ sample job submit:
 **2 x n2-standard-8-ssd-costop-appop = 1 min 15 seconds**
 
 ```bash
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-ssd-costop scripts/spark_average_speed.py --properties='spark.executor.cores=5,spark.driver.cores=5,spark.executor.instances=1,spark.executor.memory=25459m,spark.driver.memory=25459m,spark.executor.memoryOverhead=2829m,spark.default.parallelism=10,spark.sql.shuffle.partitions=10,spark.shuffle.spill.compress=true,spark.checkpoint.compress=true,spark.io.compresion.codex=snappy,spark.dynamicAllocation=true,spark.shuffle.service.enabled=true' -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
diff --git a/examples/dataproc-job-optimization-guide/scripts/setup.sh b/examples/dataproc-job-optimization-guide/scripts/setup.sh
index 6e4f9c6e5e7..bf10e24e221 100755
--- a/examples/dataproc-job-optimization-guide/scripts/setup.sh
+++ b/examples/dataproc-job-optimization-guide/scripts/setup.sh
@@ -62,7 +62,7 @@ echo "===================================================="
 echo " Removing old infrastructure ..."
 
 
-gsutil -m rm -r gs://"$bucket"
+gcloud storage rm --recursive gs://"$bucket"
 bq rm -t=true -f=true "$bucket".myTableCopy
 bq rm -t=true -f=true "$bucket".yellow_trips_copy
 
@@ -70,11 +70,11 @@ bq rm -t=true -f=true "$bucket".yellow_trips_copy
 echo "===================================================="
 echo " Building infrastructure ..."
 
-gsutil mb -c regional -l "$region" gs://"$bucket"
+gcloud storage buckets create --default-storage-class=regional --location="$region" gs://"$bucket"
 
 bq mk "$bucket"
 
-gsutil cp scripts/spark_average_speed.py gs://"$bucket"/scripts/spark_average_speed.py
+gcloud storage cp scripts/spark_average_speed.py gs://"$bucket"/scripts/spark_average_speed.py
 
 echo "===================================================="
 echo " Loading data ..."