ConnectionMaster · pull · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026
diff --git a/examples/dataflow-flex-python/README.md b/examples/dataflow-flex-python/README.md
@@ -93,10 +93,10 @@ gcloud storage buckets create gs://$INPUT_BUCKET_NAME --location $LOCATION --pro
 # Create a bucket for dataflow staging and temp locations
 gcloud storage buckets create gs://$STAGING_BUCKET_NAME --location $LOCATION --project $PROJECT_ID
 
-gsutil iam ch serviceAccount:dataflow-worker-sa@$PROJECT_ID.iam.gserviceaccount.com:roles/storage.legacyBucketWriter gs://$STAGING_BUCKET_NAME
+gcloud storage buckets add-iam-policy-binding gs://$STAGING_BUCKET_NAME --member="serviceAccount:dataflow-worker-sa@$PROJECT_ID.iam.gserviceaccount.com" --role="roles/storage.legacyBucketWriter"
 
 # Assign Legacy Bucket Writer Role on Input bucket in order to move the object
-gsutil iam ch serviceAccount:dataflow-worker-sa@$PROJECT_ID.iam.gserviceaccount.com:roles/storage.legacyBucketWriter gs://$INPUT_BUCKET_NAME
+gcloud storage buckets add-iam-policy-binding gs://$INPUT_BUCKET_NAME --member="serviceAccount:dataflow-worker-sa@$PROJECT_ID.iam.gserviceaccount.com" --role="roles/storage.legacyBucketWriter"
 ```
 
 #### Create BQ Dataset

diff --git a/examples/dataflow-production-ready/python/README.md b/examples/dataflow-production-ready/python/README.md
@@ -37,7 +37,7 @@ gcloud config set project $GCP_PROJECT
 
 Then, create a GCS bucket for this demo
 ```
-gsutil mb -l $REGION -p $GCP_PROJECT gs://$BUCKET_NAME
+gcloud storage buckets create gs://$BUCKET_NAME --location $REGION --project $GCP_PROJECT
 ```
 
 
@@ -181,4 +181,3 @@ To trigger a build on certain actions (e.g. commits to master)
 2. Configure the trigger
 3. Point the trigger to the [cloudbuild.yaml](ml_preproc/cloudbuild.yaml) file in the repository
 4. Add the substitution variables as explained in the [Substitution variables](#substitution-variables) section.
-
diff --git a/examples/dataflow-production-ready/run_system_integration_test.sh b/examples/dataflow-production-ready/run_system_integration_test.sh
@@ -53,8 +53,8 @@ SELECT r.flag AND e.flag FROM e, r"
 
 echo "Preparing GCP test resources.."
 gcloud config set project "${GCP_PROJECT}"
-gsutil mb -c standard -l "${REGION}" "${GCS_BUCKET}"
-gsutil cp "${LOCAL_INPUT_PATH}" "${GCS_BUCKET}/input/"
+gcloud storage buckets create --default-storage-class=standard --location="${REGION}" "${GCS_BUCKET}"
+gcloud storage cp "${LOCAL_INPUT_PATH}" "${GCS_BUCKET}/input/"
 #replace with terraform script and pass the dataset as var
 bq mk --location "${REGION}" "${DATASET}"
 bq mk --table "${RESULTS_TABLE}" schema/ml_preproc_results.json

diff --git a/examples/dataflow-production-ready/terraform/README.MD b/examples/dataflow-production-ready/terraform/README.MD
@@ -1,4 +1,3 @@
-
 ## Intro
 
 This Terraform module will automate the creation of infrastructure components needed for the data pipeline.
@@ -22,7 +21,7 @@ export BQ_DATASET=<BigQuery Dataset Name>
 
 * Update the Terraform backend bucket name in [backend.tf](backend.tf) (if necessary) and create a bucket with the same name
 ```
-gsutil mb -l $REGION -p $GCP_PROJECT gs://<bucket name as in backend.tf> 
+gcloud storage buckets create --location $REGION --project $GCP_PROJECT gs://<bucket name as in backend.tf> 
 ```
 
 * From the **repo root folder**, set up the environment via Cloud Build

diff --git a/examples/dataflow-xml-pubsub-to-gcs/python/README.md b/examples/dataflow-xml-pubsub-to-gcs/python/README.md
@@ -95,7 +95,7 @@ gcloud pubsub topics create $TOPIC_ID
 The output will write to a GCS bucket:
 ```
 export BUCKET_NAME=<CHANGE_ME>
-gsutil mb gs://$BUCKET_NAME
+gcloud storage buckets create gs://$BUCKET_NAME
 ```
 
 # 4. Run the test
@@ -151,8 +151,8 @@ monitoring screen.
 
 List the generated files in the GCS bucket and inspect their contents
 ```
-gsutil ls gs://${BUCKET_NAME}/output_location/
-gsutil cat gs://${BUCKET_NAME}/output_location/*
+gcloud storage ls gs://${BUCKET_NAME}/output_location/
+gcloud storage cat gs://${BUCKET_NAME}/output_location/*
 ```
 
 # 5. Clean up
@@ -164,11 +164,11 @@ gcloud pubsub topics delete $TOPIC_ID
 ```
 2. Delete the GCS files
 ```
-gsutil -m rm -rf "gs://${BUCKET_NAME}/output_location/*"
+gcloud storage rm --recursive --continue-on-error "gs://${BUCKET_NAME}/output_location/*"
 ```
 3. Remove the GCS bucket
 ```
-gsutil rb gs://${BUCKET_NAME}
+gcloud storage buckets delete gs://${BUCKET_NAME}
 ```
 4. **Optionally** Revoke the authentication credentials that you created, and delete the local credential file.
 ```

diff --git a/examples/dataproc-gcs-connector/connectors.sh b/examples/dataproc-gcs-connector/connectors.sh
@@ -126,7 +126,7 @@ update_connector_url() {
   fi
 
   # UPDATED this line to pull correct GCS connector
-  gsutil cp "gs://gcs-connector-init_actions/gcs-connector-${HADOOP_VERSION}-shaded.jar" "${vm_connectors_dir}/"
+  gcloud storage cp "gs://gcs-connector-init_actions/gcs-connector-${HADOOP_VERSION}-shaded.jar" "${vm_connectors_dir}/"
 
   local -r jar_name=${url##*/}
 

diff --git a/examples/dataproc-gcs-connector/test_gcs_connector.sh b/examples/dataproc-gcs-connector/test_gcs_connector.sh
@@ -48,4 +48,4 @@ gcloud dataproc jobs submit hive --region=us-central1 \
   --cluster=${YOUR_CLUSTER} \
   -e="$Q1" -e="$Q2" -e="$Q3"
 
-gsutil cat gs://${YOUR_BUCKET}/data_files/top_ten/000000_0
+gcloud storage cat gs://${YOUR_BUCKET}/data_files/top_ten/000000_0
diff --git a/examples/dataproc-idle-shutdown/README.md b/examples/dataproc-idle-shutdown/README.md
@@ -36,15 +36,15 @@ git clone https://github.com/GoogleCloudPlatform/professional-services.git
 ```
 Copy all artifacts to Cloud Storage:
 ```
-gsutil cp ./professional-services/examples/dataproc-idle-check/*sh gs://<BUCKET>
+gcloud storage cp ./professional-services/examples/dataproc-idle-check/*sh gs://<BUCKET>
 ```
 
 ### Cluster start: Start the cluster specifying key parameters
 1.  [Mandatory] Specify the location of the create-idlemonitoringjob.sh script as a “--initialization-actions” parameter.
 2.  [Mandatory] Specify the location of the idle-check.sh script as the value of the metadata key “script_storage_location”. The location of the idle-check.sh script and the maximum idle time should be specified as metadata using the “script_storage_location” and “max-idle” keys, respectively.
 3.  [Mandatory] Specify the maximum idle time to allow the cluster to be idle as the value of the metadata key “max-idle”. Similar to the parameter associated with Scheduled Cluster deletion, the max-idle duration parameter should be provided in IntegerUnit format, where the unit can be “s, m, h, d” (seconds, minutes, hours, days, respectively). Examples: “30m” or “1d” (30 minutes or 1 day from when the cluster becomes idle).
 4.  [Optional] Specify, as the value of the metadata key “key_process_list”, a semi-colin separated list of process names (in addition to YARN jobs and active SSH connections) for which the cluster should be considered active.
-5.  [Optional] Specify if the cluster should write diagnostic logs to the Cloud Storage staging bucket (TRUE/FALSE) as the value of the metadata key "persist_diagnostic_tarball" (TRUE). Unless specified, the default value is FALSE. The diagnostic output is saved in a folder specific to the job under which the DIAGNOSE command was run, the best way to locate the diagnostic output is " gsutil ls gs://[GCS STAGING BUCKET]/google-cloud-dataproc-metainfo/*/*/diagnostic.tar.gz".  
+5.  [Optional] Specify if the cluster should write diagnostic logs to the Cloud Storage staging bucket (TRUE/FALSE) as the value of the metadata key "persist_diagnostic_tarball" (TRUE). Unless specified, the default value is FALSE. The diagnostic output is saved in a folder specific to the job under which the DIAGNOSE command was run, the best way to locate the diagnostic output is " gcloud storage ls gs://[GCS STAGING BUCKET]/google-cloud-dataproc-metainfo/*/*/diagnostic.tar.gz".  
 
 >Note: [Google APIs](https://developers.google.com/identity/protocols/googlescopes) must also be included in scopes in order for the scripts to read and write cluster metadata.
 

diff --git a/examples/dataproc-idle-shutdown/create-idlemonitoringjob.sh b/examples/dataproc-idle-shutdown/create-idlemonitoringjob.sh
@@ -90,7 +90,7 @@ function startIdleJobChecker() {
       cd DataprocShutdown || exit
 
       # copy the script from GCS
-      gsutil cp "${SCRIPT_STORAGE_LOCATION}/idle-check.sh" .
+      gcloud storage cp "${SCRIPT_STORAGE_LOCATION}/idle-check.sh" .
       # make it executable
       chmod 700 idle-check.sh
       # run IsIdle script

diff --git a/examples/dataproc-job-optimization-guide/README.md b/examples/dataproc-job-optimization-guide/README.md
@@ -57,7 +57,7 @@ A sizing cluster can help determine the right number of workers for your applica
 
 
 ```bash
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-sizing scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -93,7 +93,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-2x8-standard \
   --worker-boot-disk-size=1000GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-2x8-standard scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -114,7 +114,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-4x4-standard \
   --worker-boot-disk-size=1000GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-4x4-standard scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -135,7 +135,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-8x2-standard \
   --worker-boot-disk-size=1000GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-standard scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -169,7 +169,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-8x2-balanced \
   --worker-boot-disk-size=500GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-balanced scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -190,7 +190,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-8x2-ssd \
   --worker-boot-disk-size=250GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-ssd scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -213,7 +213,7 @@ gcloud dataproc clusters create $CLUSTER_NAME-testing-8x2-ssd-costop \
   --worker-boot-disk-size=30GB \
   --region=$REGION
 
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-ssd-costop scripts/spark_average_speed.py -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```
@@ -230,7 +230,7 @@ sample job submit:
 **2 x n2-standard-8-ssd-costop-appop = 1 min 15 seconds**
 
 ```bash
-gsutil -m rm -r gs://$BUCKET_NAME/transformed-$TIMESTAMP
+gcloud storage rm --recursive gs://$BUCKET_NAME/transformed-$TIMESTAMP
 
 gcloud dataproc jobs submit pyspark --region=$REGION --cluster=$CLUSTER_NAME-testing-8x2-ssd-costop scripts/spark_average_speed.py --properties='spark.executor.cores=5,spark.driver.cores=5,spark.executor.instances=1,spark.executor.memory=25459m,spark.driver.memory=25459m,spark.executor.memoryOverhead=2829m,spark.default.parallelism=10,spark.sql.shuffle.partitions=10,spark.shuffle.spill.compress=true,spark.checkpoint.compress=true,spark.io.compresion.codex=snappy,spark.dynamicAllocation=true,spark.shuffle.service.enabled=true' -- gs://$BUCKET_NAME/raw-$TIMESTAMP/ gs://$BUCKET_NAME/transformed-$TIMESTAMP/
 ```

diff --git a/examples/dataproc-job-optimization-guide/scripts/setup.sh b/examples/dataproc-job-optimization-guide/scripts/setup.sh
@@ -62,19 +62,19 @@ echo "===================================================="
 echo " Removing old infrastructure ..."
 
 
-gsutil -m rm -r gs://"$bucket"
+gcloud storage rm --recursive gs://"$bucket"
 bq rm -t=true -f=true "$bucket".myTableCopy
 bq rm -t=true -f=true "$bucket".yellow_trips_copy
 
 
 echo "===================================================="
 echo " Building infrastructure ..."
 
-gsutil mb -c regional -l "$region" gs://"$bucket"
+gcloud storage buckets create --default-storage-class=regional --location="$region" gs://"$bucket"
 
 bq mk "$bucket"
 
-gsutil cp scripts/spark_average_speed.py gs://"$bucket"/scripts/spark_average_speed.py
+gcloud storage cp scripts/spark_average_speed.py gs://"$bucket"/scripts/spark_average_speed.py
 
 echo "===================================================="
 echo " Loading data ..."