From 2a79f8cd0a07fafcbef67a187e7197bd7c6a6c15 Mon Sep 17 00:00:00 2001
From: Tarun Annapareddy <tannapareddy@google.com>
Date: Mon, 23 Mar 2026 15:42:34 -0700
Subject: [PATCH 1/8] Add Documentation for RateLimiter

---
 examples/java/build.gradle                    |  26 ++++
 .../beam/examples/RateLimiterSimple.java      |   2 +
 .../inference/rate_limiter_vertex_ai.py       |   2 +
 .../examples/rate_limiter_simple.py           |   3 +
 .../en/documentation/patterns/overview.md     |   3 +
 .../documentation/patterns/rate-limiting.md   | 126 ++++++++++++++++++
 .../section-menu/en/documentation.html        |   1 +
 7 files changed, 163 insertions(+)
 create mode 100644 website/www/site/content/en/documentation/patterns/rate-limiting.md

diff --git a/examples/java/build.gradle b/examples/java/build.gradle
index 068c0d1b56fd..cf168293968f 100644
--- a/examples/java/build.gradle
+++ b/examples/java/build.gradle
@@ -158,3 +158,29 @@ task wordCount(type:JavaExec) {
   systemProperties = System.getProperties()
   args = ["--output=/tmp/output.txt"]
 }
+
+// Run any example by using class name
+// this task defines class path based on the runner argument
+task exec (type:JavaExec) {
+    mainClass = System.getProperty("mainClass")
+    def execArgs = System.getProperty("exec.args")
+    String runner
+    if (execArgs) {
+        def runnerPattern = /runner[ =]([A-Za-z]+)/
+        def matcher = execArgs =~ runnerPattern
+        if (matcher) {
+            runner = matcher[0][1]
+            runner = runner.substring(0, 1).toLowerCase() + runner.substring(1);
+            if (!(runner in (preCommitRunners + nonPreCommitRunners))) {
+                throw new GradleException("Unsupported runner: " + runner)
+            }
+        }
+    }
+    if (runner) {
+        classpath = sourceSets.main.runtimeClasspath + configurations."${runner}PreCommit"
+    } else {
+        classpath = sourceSets.main.runtimeClasspath
+    }
+    systemProperties System.getProperties()
+    args execArgs ? execArgs.split() : []
+}
diff --git a/examples/java/src/main/java/org/apache/beam/examples/RateLimiterSimple.java b/examples/java/src/main/java/org/apache/beam/examples/RateLimiterSimple.java
index a33e99e4b239..6e1151369c3a 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/RateLimiterSimple.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/RateLimiterSimple.java
@@ -59,6 +59,7 @@ public interface Options extends PipelineOptions {
     void setRateLimiterDomain(String value);
   }
 
+  // [START RateLimiterSimpleJava]
   static class CallExternalServiceFn extends DoFn<String, String> {
     private final String rlsAddress;
     private final String rlsDomain;
@@ -111,6 +112,7 @@ public void processElement(ProcessContext c) throws Exception {
       c.output("Processed: " + element);
     }
   }
+  // [END RateLimiterSimpleJava]
 
   public static void main(String[] args) {
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
diff --git a/sdks/python/apache_beam/examples/inference/rate_limiter_vertex_ai.py b/sdks/python/apache_beam/examples/inference/rate_limiter_vertex_ai.py
index 11ec02fbd54f..c16a8674f17e 100644
--- a/sdks/python/apache_beam/examples/inference/rate_limiter_vertex_ai.py
+++ b/sdks/python/apache_beam/examples/inference/rate_limiter_vertex_ai.py
@@ -52,6 +52,7 @@ def run(argv=None):
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = True
 
+  # [START RateLimiterVertexPy]
   # Initialize the EnvoyRateLimiter
   rate_limiter = EnvoyRateLimiter(
       service_address=known_args.rls_address,
@@ -67,6 +68,7 @@ def run(argv=None):
       project=known_args.project,
       location=known_args.location,
       rate_limiter=rate_limiter)
+  # [END RateLimiterVertexPy]
 
   # Input features for the model
   features = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
diff --git a/sdks/python/apache_beam/examples/rate_limiter_simple.py b/sdks/python/apache_beam/examples/rate_limiter_simple.py
index 8cdf1166aadc..bc79aa55d0f5 100644
--- a/sdks/python/apache_beam/examples/rate_limiter_simple.py
+++ b/sdks/python/apache_beam/examples/rate_limiter_simple.py
@@ -29,6 +29,7 @@
 from apache_beam.utils import shared
 
 
+# [START RateLimiterSimplePython]
 class SampleApiDoFn(beam.DoFn):
   """A DoFn that simulates calling an external API with rate limiting."""
   def __init__(self, rls_address, domain, descriptors):
@@ -59,6 +60,8 @@ def process(self, element):
     logging.info("Processing element: %s", element)
     time.sleep(0.1)
     yield element
+# [END RateLimiterSimplePython]
+
 
 
 def parse_known_args(argv):
diff --git a/website/www/site/content/en/documentation/patterns/overview.md b/website/www/site/content/en/documentation/patterns/overview.md
index a313bae9ddca..097f7983e808 100644
--- a/website/www/site/content/en/documentation/patterns/overview.md
+++ b/website/www/site/content/en/documentation/patterns/overview.md
@@ -55,6 +55,9 @@ Pipeline patterns demonstrate common Beam use cases. Pipeline patterns are based
 * [Grouping elements for efficient external service calls](/documentation/patterns/grouping-elements-for-efficient-external-service-calls/#grouping-elements-for-efficient-external-service-calls-using-the-`GroupIntoBatches`-transform)
 * [Dynamically grouping elements](/documentation/patterns/batch-elements/#dynamically-grouping-elements-using-the-`BatchElements`-transform)
 
+**Rate limiting patterns** - Patterns for rate limiting DoFns and Transforms in Beam pipelines
+* [Rate limiting DoFns and Transforms](/documentation/patterns/rate-limiting/#rate-limiting-dofns-and-transforms-in-beam-pipelines)
+
 **Cache with a shared object** - Patterns for using a shared object as a cache using the Python SDK
 * [Create a cache on a batch pipeline](/documentation/patterns/shared-class/#create-a-cache-on-a-batch-pipeline)
 * [Create a cache and update it regularly on a streaming pipeline](/documentation/patterns/shared-class/#create-a-cache-and-update-it-regularly-on-a-streaming-pipeline)
diff --git a/website/www/site/content/en/documentation/patterns/rate-limiting.md b/website/www/site/content/en/documentation/patterns/rate-limiting.md
new file mode 100644
index 000000000000..255f96389c1a
--- /dev/null
+++ b/website/www/site/content/en/documentation/patterns/rate-limiting.md
@@ -0,0 +1,126 @@
+---
+title: "Rate limiting patterns"
+---
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Rate limiting patterns
+
+Apache Beam pipelines often process data at massive scale by scaling worloads across thousands of workers. This can easily overwhelm 3rd-party REST APIs, databases, or internal microservices. Without a centralized coordination, independent workers can easily overwhelm these systems, leading to service degradation or broad IP blocking.
+
+## Centralized Rate Limit Service
+
+The recommended approach for global rate limiting in Beam is using a centralized Rate Limit Service (RLS). 
+
+A production-ready **Terraform** module to deploy this service on GKE is available in the beam repository:
+[`envoy-ratelimiter`](https://github.com/apache/beam/tree/master/examples/terraform/envoy-ratelimiter)
+
+To deploy the rate-limiting infrastructure on GKE:
+
+1. Update `terraform.tfvars` with your project variables to adjust rules and domains.
+2. Run the helper deploy script: `./deploy.sh`
+
+This script automates deployment and, upon completion, returns the Internal Load Balancer IP address for your deployment that you will use in your pipeline.
+
+---
+
+{{< language-switcher java py >}}
+
+## Using RateLimiter
+
+To rate limit requests in your pipeline, you create a RateLimiter client in your `DoFn`'s setup phase and acquire permits before making calls in the process phase.
+
+{{< paragraph class="language-java" >}}
+In Java, use the `RateLimiter` interface and `EnvoyRateLimiterFactory` implementation to coordinate with the Envoy service. Create `RateLimiterOptions` with your service address, initialize the client in @Setup using `EnvoyRateLimiterFactory`, and call `rateLimiter.allow(batchSize)` in @ProcessElement to acquire a batch of permits.
+{{< /paragraph >}}
+
+{{< highlight java >}}
+{{< code_sample "examples/java/src/main/java/org/apache/beam/examples/RateLimiterSimple.java" RateLimiterSimpleJava >}}
+{{< /highlight >}}
+
+{{< paragraph class="language-py" >}}
+In Python, use the `EnvoyRateLimiter` and <a href="/documentation/patterns/shared-class/" style="text-decoration: underline;">Shared</a> to coordinate a single client instance shared across threads. Initialize client in `setup()` using `shared`, and call `self.rate_limiter.allow()` in `process()` to acquire rate-limiting permits before executing API calls.
+{{< /paragraph >}}
+
+{{< highlight py >}}
+{{< code_sample "sdks/python/apache_beam/examples/rate_limiter_simple.py" RateLimiterSimplePython >}}
+{{< /highlight >}}
+
+
+{{< paragraph class="language-py" >}}
+If you are using **RunInference** for remote model inference (e.g., Vertex AI), you can pass the `EnvoyRateLimiter` directly to the `ModelHandler`. The model handler coordinates the rate limit internally across your distributed workers.
+{{< /paragraph >}}
+
+{{< highlight py >}}
+# Initialize the EnvoyRateLimiter
+rate_limiter = EnvoyRateLimiter(
+    service_address=known_args.rls_address,
+    domain="mongo_cps",
+    descriptors=[{"database": "users"}]
+)
+
+# Initialize the VertexAIModelHandler with the rate limiter
+model_handler = VertexAIModelHandlerJSON(
+    endpoint_id=known_args.endpoint_id,
+    project=known_args.project,
+    location=known_args.location,
+    rate_limiter=rate_limiter
+)
+{{< /highlight >}}
+
+---
+
+## Running with Dataflow
+
+Once your Rate Limiter Service is deployed and has an Internal IP, you can run your pipeline pointing to that address.
+
+<div class="language-java">
+
+### Example Execution
+
+```bash
+# Get the IP from your RLS deployment
+export RLS_ADDRESS="<INTERNAL_IP>:8081"
+
+./gradlew :examples:java:execute -DmainClass=org.apache.beam.examples.RateLimiterSimple \
+  -Dexec.args="--runner=DataflowRunner \
+  --project=<YOUR_PROJECT_ID> \
+  --region=<YOUR_REGION> \
+  --tempLocation=gs://<YOUR_BUCKET>/temp \
+  --rateLimiterAddress=${RLS_ADDRESS} \
+  --rateLimiterDomain=mongo_cps \
+  --subnetwork=regions/<YOUR_REGION>/subnetworks/<YOUR_SUBNET_NAME> \
+  --usePublicIps=false"
+```
+</div>
+
+<div class="language-py">
+
+### Example Execution
+
+```bash
+# Get the IP from your RLS deployment
+export RLS_ADDRESS="<INTERNAL_IP>:8081"
+
+python -m apache_beam.examples.rate_limiter_simple \
+  --runner=DataflowRunner \
+  --project=<YOUR_PROJECT_ID> \
+  --region=<YOUR_REGION> \
+  --temp_location=gs://<YOUR_BUCKET>/temp \
+  --rls_address=${RLS_ADDRESS} \
+  --subnetwork=regions/<YOUR_REGION>/subnetworks/<YOUR_SUBNET_NAME> \
+  --no_use_public_ips
+```
+</div>
+
diff --git a/website/www/site/layouts/partials/section-menu/en/documentation.html b/website/www/site/layouts/partials/section-menu/en/documentation.html
index 0cc197d95fdc..0e87273853bf 100755
--- a/website/www/site/layouts/partials/section-menu/en/documentation.html
+++ b/website/www/site/layouts/partials/section-menu/en/documentation.html
@@ -213,6 +213,7 @@
     <li><a href="/documentation/patterns/schema/">Schema</a></li>
     <li><a href="/documentation/patterns/bqml/">BigQuery ML</a></li>
     <li><a href="/documentation/patterns/grouping-elements-for-efficient-external-service-calls/">Grouping elements for efficient external service calls</a></li>
+    <li><a href="/documentation/patterns/rate-limiting/">Rate limiting DoFns and Transforms</a></li>
     <li><a href="/documentation/patterns/shared-class/">Cache using a shared object</a></li>
   </ul>
 </li>

From 3b68da1575a5ed855a96f82341a2c8028967a07b Mon Sep 17 00:00:00 2001
From: Tarun Annapareddy <tannapareddy@google.com>
Date: Wed, 25 Mar 2026 08:12:01 -0700
Subject: [PATCH 2/8] Add Autoscaler integration

---
 .../documentation/patterns/rate-limiting.md   | 31 +++++++++----------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/website/www/site/content/en/documentation/patterns/rate-limiting.md b/website/www/site/content/en/documentation/patterns/rate-limiting.md
index 255f96389c1a..0e29a8716dfd 100644
--- a/website/www/site/content/en/documentation/patterns/rate-limiting.md
+++ b/website/www/site/content/en/documentation/patterns/rate-limiting.md
@@ -17,13 +17,13 @@ limitations under the License.
 
 # Rate limiting patterns
 
-Apache Beam pipelines often process data at massive scale by scaling worloads across thousands of workers. This can easily overwhelm 3rd-party REST APIs, databases, or internal microservices. Without a centralized coordination, independent workers can easily overwhelm these systems, leading to service degradation or broad IP blocking.
+Apache Beam is built to maximize throughput by scaling workloads across thousands of workers. However, this massive parallelism becomes a liability when pipelines interact with external systems that enforce strict quotas, such as 3rd-party REST APIs, databases, or internal microservices. Without a centralized coordination mechanism, independent workers can easily overwhelm these systems, leading to service degradation or broad IP blocking.
 
 ## Centralized Rate Limit Service
 
 The recommended approach for global rate limiting in Beam is using a centralized Rate Limit Service (RLS). 
 
-A production-ready **Terraform** module to deploy this service on GKE is available in the beam repository:
+A production-ready Terraform module to deploy this service on GKE is available in the beam repository:
 [`envoy-ratelimiter`](https://github.com/apache/beam/tree/master/examples/terraform/envoy-ratelimiter)
 
 To deploy the rate-limiting infrastructure on GKE:
@@ -39,7 +39,7 @@ This script automates deployment and, upon completion, returns the Internal Load
 
 ## Using RateLimiter
 
-To rate limit requests in your pipeline, you create a RateLimiter client in your `DoFn`'s setup phase and acquire permits before making calls in the process phase.
+To rate limit requests in your pipeline, you can create a RateLimiter client in your `DoFn`'s setup phase and acquire permits before making calls in the process phase.
 
 {{< paragraph class="language-java" >}}
 In Java, use the `RateLimiter` interface and `EnvoyRateLimiterFactory` implementation to coordinate with the Envoy service. Create `RateLimiterOptions` with your service address, initialize the client in @Setup using `EnvoyRateLimiterFactory`, and call `rateLimiter.allow(batchSize)` in @ProcessElement to acquire a batch of permits.
@@ -57,26 +57,18 @@ In Python, use the `EnvoyRateLimiter` and <a href="/documentation/patterns/share
 {{< code_sample "sdks/python/apache_beam/examples/rate_limiter_simple.py" RateLimiterSimplePython >}}
 {{< /highlight >}}
 
+## AutoScaler Integration
+
+When using RateLimiter in your pipelines, the throttling time and signals will be picked up by the autoscaler. This allows the autoscaler to scale down the number of workers when the pipeline is being throttled by the external service, preventing unnecessary resource usage.
+
+**Dataflow** currently supports this AutoScaler integration for **Batch RunnerV2**. Note that AutoScaler integration for Streaming mode is a known limitation.
 
 {{< paragraph class="language-py" >}}
 If you are using **RunInference** for remote model inference (e.g., Vertex AI), you can pass the `EnvoyRateLimiter` directly to the `ModelHandler`. The model handler coordinates the rate limit internally across your distributed workers.
 {{< /paragraph >}}
 
 {{< highlight py >}}
-# Initialize the EnvoyRateLimiter
-rate_limiter = EnvoyRateLimiter(
-    service_address=known_args.rls_address,
-    domain="mongo_cps",
-    descriptors=[{"database": "users"}]
-)
-
-# Initialize the VertexAIModelHandler with the rate limiter
-model_handler = VertexAIModelHandlerJSON(
-    endpoint_id=known_args.endpoint_id,
-    project=known_args.project,
-    location=known_args.location,
-    rate_limiter=rate_limiter
-)
+{{< code_sample "sdks/python/apache_beam/examples/inference/rate_limiter_vertex_ai.py" RateLimiterVertexPy >}}
 {{< /highlight >}}
 
 ---
@@ -124,3 +116,8 @@ python -m apache_beam.examples.rate_limiter_simple \
 ```
 </div>
 
+## AutoScaler Integration
+
+The throttling time and signals from the RateLimiter has to be picked up by the autoscaler. This allows the autoscaler to scale down the workers when the pipeline is being throttled by the external service, preventing unnecessary resource usage.
+
+**Dataflow** currently supports this AutoScaler integration for **Batch RunnerV2**. Note that AutoScaler integration for Streaming mode is a known limitation.

From 146ff93ff3d0a939c25a04060489b12b05f8f010 Mon Sep 17 00:00:00 2001
From: Tarun Annapareddy <tannapareddy@google.com>
Date: Wed, 25 Mar 2026 08:39:02 -0700
Subject: [PATCH 3/8] fix command

---
 .../content/en/documentation/patterns/rate-limiting.md | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/website/www/site/content/en/documentation/patterns/rate-limiting.md b/website/www/site/content/en/documentation/patterns/rate-limiting.md
index 0e29a8716dfd..b097dba0600f 100644
--- a/website/www/site/content/en/documentation/patterns/rate-limiting.md
+++ b/website/www/site/content/en/documentation/patterns/rate-limiting.md
@@ -57,12 +57,6 @@ In Python, use the `EnvoyRateLimiter` and <a href="/documentation/patterns/share
 {{< code_sample "sdks/python/apache_beam/examples/rate_limiter_simple.py" RateLimiterSimplePython >}}
 {{< /highlight >}}
 
-## AutoScaler Integration
-
-When using RateLimiter in your pipelines, the throttling time and signals will be picked up by the autoscaler. This allows the autoscaler to scale down the number of workers when the pipeline is being throttled by the external service, preventing unnecessary resource usage.
-
-**Dataflow** currently supports this AutoScaler integration for **Batch RunnerV2**. Note that AutoScaler integration for Streaming mode is a known limitation.
-
 {{< paragraph class="language-py" >}}
 If you are using **RunInference** for remote model inference (e.g., Vertex AI), you can pass the `EnvoyRateLimiter` directly to the `ModelHandler`. The model handler coordinates the rate limit internally across your distributed workers.
 {{< /paragraph >}}
@@ -79,8 +73,6 @@ Once your Rate Limiter Service is deployed and has an Internal IP, you can run y
 
 <div class="language-java">
 
-### Example Execution
-
 ```bash
 # Get the IP from your RLS deployment
 export RLS_ADDRESS="<INTERNAL_IP>:8081"
@@ -99,8 +91,6 @@ export RLS_ADDRESS="<INTERNAL_IP>:8081"
 
 <div class="language-py">
 
-### Example Execution
-
 ```bash
 # Get the IP from your RLS deployment
 export RLS_ADDRESS="<INTERNAL_IP>:8081"

From f71e3c382862fa9cec28f510e60d933a37af7841 Mon Sep 17 00:00:00 2001
From: Tarun Annapareddy <tannapareddy@google.com>
Date: Wed, 25 Mar 2026 08:45:27 -0700
Subject: [PATCH 4/8] fix run command

---
 .../en/documentation/patterns/overview.md        |  2 +-
 .../en/documentation/patterns/rate-limiting.md   | 16 +++++-----------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/website/www/site/content/en/documentation/patterns/overview.md b/website/www/site/content/en/documentation/patterns/overview.md
index 097f7983e808..4279695afc6a 100644
--- a/website/www/site/content/en/documentation/patterns/overview.md
+++ b/website/www/site/content/en/documentation/patterns/overview.md
@@ -56,7 +56,7 @@ Pipeline patterns demonstrate common Beam use cases. Pipeline patterns are based
 * [Dynamically grouping elements](/documentation/patterns/batch-elements/#dynamically-grouping-elements-using-the-`BatchElements`-transform)
 
 **Rate limiting patterns** - Patterns for rate limiting DoFns and Transforms in Beam pipelines
-* [Rate limiting DoFns and Transforms](/documentation/patterns/rate-limiting/#rate-limiting-dofns-and-transforms-in-beam-pipelines)
+* [Rate limiting DoFns and Transforms](/documentation/patterns/rate-limiting)
 
 **Cache with a shared object** - Patterns for using a shared object as a cache using the Python SDK
 * [Create a cache on a batch pipeline](/documentation/patterns/shared-class/#create-a-cache-on-a-batch-pipeline)
diff --git a/website/www/site/content/en/documentation/patterns/rate-limiting.md b/website/www/site/content/en/documentation/patterns/rate-limiting.md
index b097dba0600f..503bc0db23d0 100644
--- a/website/www/site/content/en/documentation/patterns/rate-limiting.md
+++ b/website/www/site/content/en/documentation/patterns/rate-limiting.md
@@ -71,13 +71,11 @@ If you are using **RunInference** for remote model inference (e.g., Vertex AI),
 
 Once your Rate Limiter Service is deployed and has an Internal IP, you can run your pipeline pointing to that address.
 
-<div class="language-java">
-
-```bash
+{{< highlight java >}}
 # Get the IP from your RLS deployment
 export RLS_ADDRESS="<INTERNAL_IP>:8081"
 
-./gradlew :examples:java:execute -DmainClass=org.apache.beam.examples.RateLimiterSimple \
+./gradlew :examples:java:exec -DmainClass=org.apache.beam.examples.RateLimiterSimple \
   -Dexec.args="--runner=DataflowRunner \
   --project=<YOUR_PROJECT_ID> \
   --region=<YOUR_REGION> \
@@ -86,12 +84,9 @@ export RLS_ADDRESS="<INTERNAL_IP>:8081"
   --rateLimiterDomain=mongo_cps \
   --subnetwork=regions/<YOUR_REGION>/subnetworks/<YOUR_SUBNET_NAME> \
   --usePublicIps=false"
-```
-</div>
-
-<div class="language-py">
+{{< /highlight >}}
 
-```bash
+{{< highlight py >}}
 # Get the IP from your RLS deployment
 export RLS_ADDRESS="<INTERNAL_IP>:8081"
 
@@ -103,8 +98,7 @@ python -m apache_beam.examples.rate_limiter_simple \
   --rls_address=${RLS_ADDRESS} \
   --subnetwork=regions/<YOUR_REGION>/subnetworks/<YOUR_SUBNET_NAME> \
   --no_use_public_ips
-```
-</div>
+{{< /highlight >}}
 
 ## AutoScaler Integration
 

From f8d5dd5ef960a1deff74ca748662650582f94b5b Mon Sep 17 00:00:00 2001
From: Tarun Annapareddy <tannapareddy@google.com>
Date: Wed, 25 Mar 2026 08:51:29 -0700
Subject: [PATCH 5/8] update overview

---
 .../www/site/content/en/documentation/patterns/overview.md  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/website/www/site/content/en/documentation/patterns/overview.md b/website/www/site/content/en/documentation/patterns/overview.md
index 4279695afc6a..527f15f6dccf 100644
--- a/website/www/site/content/en/documentation/patterns/overview.md
+++ b/website/www/site/content/en/documentation/patterns/overview.md
@@ -55,13 +55,13 @@ Pipeline patterns demonstrate common Beam use cases. Pipeline patterns are based
 * [Grouping elements for efficient external service calls](/documentation/patterns/grouping-elements-for-efficient-external-service-calls/#grouping-elements-for-efficient-external-service-calls-using-the-`GroupIntoBatches`-transform)
 * [Dynamically grouping elements](/documentation/patterns/batch-elements/#dynamically-grouping-elements-using-the-`BatchElements`-transform)
 
-**Rate limiting patterns** - Patterns for rate limiting DoFns and Transforms in Beam pipelines
-* [Rate limiting DoFns and Transforms](/documentation/patterns/rate-limiting)
-
 **Cache with a shared object** - Patterns for using a shared object as a cache using the Python SDK
 * [Create a cache on a batch pipeline](/documentation/patterns/shared-class/#create-a-cache-on-a-batch-pipeline)
 * [Create a cache and update it regularly on a streaming pipeline](/documentation/patterns/shared-class/#create-a-cache-and-update-it-regularly-on-a-streaming-pipeline)
 
+**Rate limiting patterns** - Patterns for rate limiting DoFns and Transforms in Beam pipelines
+* [Rate limiting DoFns and Transforms](/documentation/patterns/rate-limiting)
+
 ## Contributing a pattern
 
 To contribute a new pipeline pattern, create [a feature request](https://github.com/apache/beam/issues/new?labels=new+feature%2Cawaiting+triage&template=feature.yml&title=%5BFeature+Request%5D%3A+) and add details to the issue description. See [Get started contributing](/contribute/) for more information.

From 842320e47e891d8be210eb9e95aa490e7c9aa3a2 Mon Sep 17 00:00:00 2001
From: Tarun Annapareddy <tannapareddy@google.com>
Date: Wed, 25 Mar 2026 10:32:49 -0700
Subject: [PATCH 6/8] fix spotless

---
 sdks/python/apache_beam/examples/rate_limiter_simple.py        | 3 ++-
 .../site/content/en/documentation/patterns/rate-limiting.md    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/examples/rate_limiter_simple.py b/sdks/python/apache_beam/examples/rate_limiter_simple.py
index bc79aa55d0f5..00c0a34c3775 100644
--- a/sdks/python/apache_beam/examples/rate_limiter_simple.py
+++ b/sdks/python/apache_beam/examples/rate_limiter_simple.py
@@ -60,9 +60,10 @@ def process(self, element):
     logging.info("Processing element: %s", element)
     time.sleep(0.1)
     yield element
-# [END RateLimiterSimplePython]
 
 
+# [END RateLimiterSimplePython]
+
 
 def parse_known_args(argv):
   """Parses args for the workflow."""
diff --git a/website/www/site/content/en/documentation/patterns/rate-limiting.md b/website/www/site/content/en/documentation/patterns/rate-limiting.md
index 503bc0db23d0..2de64c6d38d3 100644
--- a/website/www/site/content/en/documentation/patterns/rate-limiting.md
+++ b/website/www/site/content/en/documentation/patterns/rate-limiting.md
@@ -21,7 +21,7 @@ Apache Beam is built to maximize throughput by scaling workloads across thousand
 
 ## Centralized Rate Limit Service
 
-The recommended approach for global rate limiting in Beam is using a centralized Rate Limit Service (RLS). 
+The recommended approach for global rate limiting in Beam is using a centralized Rate Limit Service (RLS).
 
 A production-ready Terraform module to deploy this service on GKE is available in the beam repository:
 [`envoy-ratelimiter`](https://github.com/apache/beam/tree/master/examples/terraform/envoy-ratelimiter)

From 1c59ab933b19a6bcb540dc9644b6317aef164530 Mon Sep 17 00:00:00 2001
From: Tarun Annapareddy <tannapareddy@google.com>
Date: Thu, 26 Mar 2026 10:24:13 -0700
Subject: [PATCH 7/8] fix documentation

---
 .../www/site/content/en/documentation/patterns/rate-limiting.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/www/site/content/en/documentation/patterns/rate-limiting.md b/website/www/site/content/en/documentation/patterns/rate-limiting.md
index 2de64c6d38d3..7c720ce7ed52 100644
--- a/website/www/site/content/en/documentation/patterns/rate-limiting.md
+++ b/website/www/site/content/en/documentation/patterns/rate-limiting.md
@@ -17,7 +17,7 @@ limitations under the License.
 
 # Rate limiting patterns
 
-Apache Beam is built to maximize throughput by scaling workloads across thousands of workers. However, this massive parallelism becomes a liability when pipelines interact with external systems that enforce strict quotas, such as 3rd-party REST APIs, databases, or internal microservices. Without a centralized coordination mechanism, independent workers can easily overwhelm these systems, leading to service degradation or broad IP blocking.
+Apache Beam is built to maximize throughput by scaling workloads across thousands of workers. However, this massive parallelism requires coordination when pipelines interact with external systems that enforce strict quotas, such as 3rd-party REST APIs, databases, or internal microservices. Without a centralized rate limiting mechanism, independent workers might exceed the capacity of these systems, resulting in service degradation or broad IP blocking.
 
 ## Centralized Rate Limit Service
 

From e7cb4fb6fcf03c494f6a892865f58a6957dfca5c Mon Sep 17 00:00:00 2001
From: Tarun Annapareddy <tannapareddy@google.com>
Date: Thu, 26 Mar 2026 12:56:22 -0700
Subject: [PATCH 8/8] remove naming

---
 .../documentation/patterns/rate-limiting.md   | 20 +++++--------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/website/www/site/content/en/documentation/patterns/rate-limiting.md b/website/www/site/content/en/documentation/patterns/rate-limiting.md
index 7c720ce7ed52..7c13c12ffa21 100644
--- a/website/www/site/content/en/documentation/patterns/rate-limiting.md
+++ b/website/www/site/content/en/documentation/patterns/rate-limiting.md
@@ -67,7 +67,7 @@ If you are using **RunInference** for remote model inference (e.g., Vertex AI),
 
 ---
 
-## Running with Dataflow
+## Running Example Pipelines with RateLimiter
 
 Once your Rate Limiter Service is deployed and has an Internal IP, you can run your pipeline pointing to that address.
 
@@ -76,14 +76,9 @@ Once your Rate Limiter Service is deployed and has an Internal IP, you can run y
 export RLS_ADDRESS="<INTERNAL_IP>:8081"
 
 ./gradlew :examples:java:exec -DmainClass=org.apache.beam.examples.RateLimiterSimple \
-  -Dexec.args="--runner=DataflowRunner \
-  --project=<YOUR_PROJECT_ID> \
-  --region=<YOUR_REGION> \
-  --tempLocation=gs://<YOUR_BUCKET>/temp \
+  -Dexec.args="--runner=<RUNNER> \
   --rateLimiterAddress=${RLS_ADDRESS} \
-  --rateLimiterDomain=mongo_cps \
-  --subnetwork=regions/<YOUR_REGION>/subnetworks/<YOUR_SUBNET_NAME> \
-  --usePublicIps=false"
+  --rateLimiterDomain=mongo_cps"
 {{< /highlight >}}
 
 {{< highlight py >}}
@@ -91,13 +86,8 @@ export RLS_ADDRESS="<INTERNAL_IP>:8081"
 export RLS_ADDRESS="<INTERNAL_IP>:8081"
 
 python -m apache_beam.examples.rate_limiter_simple \
-  --runner=DataflowRunner \
-  --project=<YOUR_PROJECT_ID> \
-  --region=<YOUR_REGION> \
-  --temp_location=gs://<YOUR_BUCKET>/temp \
-  --rls_address=${RLS_ADDRESS} \
-  --subnetwork=regions/<YOUR_REGION>/subnetworks/<YOUR_SUBNET_NAME> \
-  --no_use_public_ips
+  --runner=<RUNNER> \
+  --rls_address=${RLS_ADDRESS}
 {{< /highlight >}}
 
 ## AutoScaler Integration