NVIDIA-NeMo · nabinchha · Mar 24, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
@@ -57,7 +57,7 @@
         ProcessorType,
         SchemaTransformProcessorConfig,
     )
-    from data_designer.config.run_config import RunConfig  # noqa: F401
+    from data_designer.config.run_config import RunConfig, ThrottleConfig  # noqa: F401
     from data_designer.config.sampler_constraints import (  # noqa: F401
         ColumnInequalityConstraint,
         ConstraintType,
@@ -173,6 +173,7 @@
     "SchemaTransformProcessorConfig": (_MOD_PROCESSORS, "SchemaTransformProcessorConfig"),
     # run_config
     "RunConfig": (f"{_MOD_BASE}.run_config", "RunConfig"),
+    "ThrottleConfig": (f"{_MOD_BASE}.run_config", "ThrottleConfig"),
     # sampler_constraints
     "ColumnInequalityConstraint": (_MOD_SAMPLER_CONSTRAINTS, "ColumnInequalityConstraint"),
     "ConstraintType": (_MOD_SAMPLER_CONSTRAINTS, "ConstraintType"),

@@ -3,12 +3,71 @@
 
 from __future__ import annotations
 
+from typing import ClassVar
+
 from pydantic import Field, model_validator
 from typing_extensions import Self
 
 from data_designer.config.base import ConfigBase
 
 
+class ThrottleConfig(ConfigBase):
+    """AIMD throttle tuning parameters for adaptive concurrency control.
+
+    These knobs configure the ``ThrottleManager`` that wraps every outbound
+    model HTTP request.  The defaults are conservative and suitable for most
+    workloads; override only when you understand the trade-offs.
+
+    Attributes:
+        reduce_factor: Multiplicative decrease factor applied to the per-domain
+            concurrency limit on a 429 / rate-limit signal.  Must be in (0, 1).
+            Default is 0.75 (reduce by 25% on rate-limit).
+        additive_increase: Additive increase step applied after every
+            ``success_window`` consecutive successes.  Default is 1.
+        success_window: Number of consecutive successful releases before
+            the additive increase is applied.  Default is 25.
+        cooldown_seconds: Default cooldown duration (seconds) applied after a
+            rate-limit when the provider does not include a ``Retry-After``
+            header.  Default is 2.0.
+        ceiling_overshoot: Fraction above the observed rate-limit ceiling
+            that additive increase is allowed to probe before capping.
+            Default is 0.10 (10% overshoot).
+    """
+
+    DEFAULT_REDUCE_FACTOR: ClassVar[float] = 0.75
+    DEFAULT_ADDITIVE_INCREASE: ClassVar[int] = 1
+    DEFAULT_SUCCESS_WINDOW: ClassVar[int] = 25
+    DEFAULT_COOLDOWN_SECONDS: ClassVar[float] = 2.0
+    DEFAULT_CEILING_OVERSHOOT: ClassVar[float] = 0.10
+
+    reduce_factor: float = Field(
+        default=DEFAULT_REDUCE_FACTOR,
+        gt=0.0,
+        lt=1.0,
+        description="Multiplicative decrease factor applied to the per-domain concurrency limit on a 429 signal.",
+    )
+    additive_increase: int = Field(
+        default=DEFAULT_ADDITIVE_INCREASE,
+        ge=1,
+        description="Additive increase step applied after every `success_window` consecutive successes.",
+    )
+    success_window: int = Field(
+        default=DEFAULT_SUCCESS_WINDOW,
+        ge=1,
+        description="Number of consecutive successful releases before the additive increase is applied.",
+    )
+    cooldown_seconds: float = Field(
+        default=DEFAULT_COOLDOWN_SECONDS,
+        gt=0.0,
+        description="Default cooldown duration (seconds) after a rate-limit when no Retry-After header is present.",
+    )
+    ceiling_overshoot: float = Field(
+        default=DEFAULT_CEILING_OVERSHOOT,
+        ge=0.0,
+        description="Fraction above the rate-limit ceiling that additive increase is allowed to probe.",
+    )
+
+
 class RunConfig(ConfigBase):
     """Runtime configuration for dataset generation.
 
@@ -35,6 +94,7 @@ class RunConfig(ConfigBase):
             Default is 0.
         async_trace: If True, collect per-task tracing data when using the async engine
             (DATA_DESIGNER_ASYNC_ENGINE=1). Has no effect on the sync path. Default is False.
+        throttle: AIMD throttle tuning parameters.  See ``ThrottleConfig`` for details.
     """
 
     disable_early_shutdown: bool = False
@@ -45,6 +105,7 @@ class RunConfig(ConfigBase):
     max_conversation_restarts: int = Field(default=5, ge=0)
     max_conversation_correction_steps: int = Field(default=0, ge=0)
     async_trace: bool = False
+    throttle: ThrottleConfig = Field(default_factory=ThrottleConfig)
 
     @model_validator(mode="after")
     def normalize_shutdown_settings(self) -> Self:

@@ -57,6 +57,11 @@ class ColumnGenerator(ConfigurableTask[TaskConfigT], ABC):
     def can_generate_from_scratch(self) -> bool:
         return False
 
+    @property
+    def is_llm_bound(self) -> bool:
+        """Whether this generator makes LLM/HTTP calls during generation."""
+        return False
+
     @property
     def is_order_dependent(self) -> bool:
         """Whether this generator's output depends on prior row-group calls.
@@ -130,6 +135,10 @@ async def agenerate_from_scratch(self, num_records: int) -> pd.DataFrame:
 
 
 class ColumnGeneratorWithModelRegistry(ColumnGenerator[TaskConfigT], ABC):
+    @property
+    def is_llm_bound(self) -> bool:
+        return True
+
     @property
     def model_registry(self) -> ModelRegistry:
         return self.resource_provider.model_registry

@@ -40,6 +40,11 @@ class CustomColumnGenerator(ColumnGenerator[CustomColumnConfig]):
     The models dict provides direct access to ModelFacade instances keyed by alias.
     """
 
+    @property
+    def is_llm_bound(self) -> bool:
+        """Custom generators with model_aliases make LLM calls and need the handoff."""
+        return bool(self.config.model_aliases)
+
     def get_generation_strategy(self) -> GenerationStrategy:
         """Return strategy based on config."""
         return self.config.generation_strategy