NVIDIA-NeMo
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.md‎
Lines changed: 0 additions & 42 deletions b/‎.github/ISSUE_TEMPLATE/bug_report.md‎
Lines changed: 0 additions & 42 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/feature_request.md‎
Lines changed: 0 additions & 25 deletions b/‎.github/ISSUE_TEMPLATE/feature_request.md‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 3 deletions b/‎README.md‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎nemo_gym/cli.py‎
Lines changed: 3 additions & 0 deletions b/‎nemo_gym/cli.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎nemo_gym/config_types.py‎
Lines changed: 2 additions & 0 deletions b/‎nemo_gym/config_types.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎nemo_gym/openai_utils.py‎
Lines changed: 4 additions & 1 deletion b/‎nemo_gym/openai_utils.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎nemo_gym/rollout_collection.py‎
Lines changed: 7 additions & 2 deletions b/‎nemo_gym/rollout_collection.py‎
Lines changed: 7 additions & 2 deletions
@@ -600,14 +600,15 @@ ng_collect_rollouts +agent_name=multineedle_simple_agent \
     +output_jsonl_fpath=results/multineedle_rollout_collection.jsonl \
     +limit=null \
     +num_repeats=null \
-    +num_samples_in_parallel=null
+    +num_samples_in_parallel=null \
+    +responses_create_params.max_output_tokens=32_768
 ```
 
 The supported parameters include:
 - `limit`: Limits how many examples from the input JSONL file to process
 - `num_repeats`: Repeats each input example multiple times to collect multiple rollouts per example
 - `num_samples_in_parallel`: Controls how many rollout collection requests run concurrently
-
+- `responses_create_params`: A dictionary of sampling parameter overrides.
 
 View the rollouts just collected!
 ```
@@ -637,6 +638,7 @@ multineedle_simple_agent:
         type: train
         license: Apache 2.0
         jsonl_fpath: resources_servers/multineedle/data/train.jsonl
+        num_repeats: 1
         gitlab_identifier:
           dataset_name: multineedle
           version: 0.0.1
@@ -646,6 +648,7 @@ multineedle_simple_agent:
         type: validation
         license: Apache 2.0
         jsonl_fpath: resources_servers/multineedle/data/validation.jsonl
+        num_repeats: 1
         gitlab_identifier:
           dataset_name: multineedle
           version: 0.0.1
@@ -654,12 +657,14 @@ multineedle_simple_agent:
       - name: example
         type: example
         jsonl_fpath: resources_servers/multineedle/data/example.jsonl
+        num_repeats: 1
 ```
 
 A dataset object consists of:
 - Name: An identifier for you
 - Type: train, validation, or example. Train and validation are as used in NeMo RL or other train frameworks. More information about the example type is in the next section.
 - Jsonl fpath: the local file path to your jsonl file for this dataset.
+- Num repeats: optionally repeat each row when preparing or collating data. Defaults to 1 if unspecified.
 - Gitlab identifier: The remote path to the dataset as held in the Gitlab dataset registry. This field is required for train and validation datasets. (Not required for example datasets since those are required to be committed to Git).
 - License: The license of that dataset. Required for train and validation datasets and not required for example datasets, similar in principle to the Gitlab identifier.
 - Start idx, end idx: used for slicing your dataset.
@@ -816,7 +821,7 @@ ng_collect_rollouts +agent_name=library_judge_math_simple_agent \
     +input_jsonl_fpath=resources_servers/library_judge_math/data/dapo17k_bytedtsinghua_train.jsonl \
     +output_jsonl_fpath=temp/library_judge_math_rollouts.jsonl \
     +limit=1024 \
-    +num_repeats 1
+    +num_repeats=1
 ```
 
 After `ng_collect_rollouts` finishes, ctrl+c to quit your servers. You should see some output in the terminal like this:
 
@@ -518,6 +518,7 @@ def init_resources_server():  # pragma: no cover
       - name: train
         type: train
         jsonl_fpath: resources_servers/{server_type_name}/data/train.jsonl
+        num_repeats: 1
         gitlab_identifier:
           dataset_name: {server_type_name}
           version: 0.0.1
@@ -526,6 +527,7 @@ def init_resources_server():  # pragma: no cover
       - name: validation
         type: validation
         jsonl_fpath: resources_servers/{server_type_name}/data/validation.jsonl
+        num_repeats: 1
         gitlab_identifier:
           dataset_name: {server_type_name}
           version: 0.0.1
@@ -534,6 +536,7 @@ def init_resources_server():  # pragma: no cover
       - name: example
         type: example
         jsonl_fpath: resources_servers/{server_type_name}/data/example.jsonl
+        num_repeats: 1
 """)
 
     app_fpath = dirpath / "app.py"
 
@@ -87,10 +87,12 @@ class DatasetConfig(BaseModel):
     type: DatasetType
     jsonl_fpath: str
 
+    num_repeats: int = Field(default=1, ge=1)
     gitlab_identifier: Optional[JsonlDatasetGitlabIdentifer] = None
     license: Optional[
         Union[
             Literal["Apache 2.0"],
+            Literal["MIT"],
             Literal["Creative Commons Attribution 4.0 International"],
             Literal["Creative Commons Attribution-ShareAlike 4.0 International"],
             Literal["TBD"],
 
@@ -74,7 +74,7 @@
 from pydantic import BaseModel, ConfigDict, Field
 from typing_extensions import TypedDict
 
-from nemo_gym.server_utils import request
+from nemo_gym.server_utils import raise_for_status, request
 
 
 ########################################
@@ -432,6 +432,7 @@ async def create_chat_completion(self, **kwargs):
             json=kwargs,
             headers={"Authorization": f"Bearer {self.api_key}"},
         )
+        raise_for_status(response)
         return await response.json()
 
     async def create_response(self, **kwargs):
@@ -441,6 +442,7 @@ async def create_response(self, **kwargs):
             json=kwargs,
             headers={"Authorization": f"Bearer {self.api_key}"},
         )
+        raise_for_status(response)
         return await response.json()
 
     async def create_tokenize(self, **kwargs):
@@ -451,4 +453,5 @@ async def create_tokenize(self, **kwargs):
             json=kwargs,
             headers={"Authorization": f"Bearer {self.api_key}"},
         )
+        raise_for_status(response)
         return await response.json()
@@ -17,9 +17,9 @@
 from collections import Counter
 from contextlib import nullcontext
 from itertools import chain, repeat
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from tqdm.asyncio import tqdm
 
 from nemo_gym.config_types import BaseServerConfig
@@ -39,6 +39,7 @@ class RolloutCollectionConfig(BaseModel):
     limit: Optional[int] = None
     num_repeats: Optional[int] = None
     num_samples_in_parallel: Optional[int] = None
+    responses_create_params: Dict[str, Any] = Field(default_factory=dict)
 
 
 class RolloutCollectionHelper(BaseModel):  # pragma: no cover
@@ -68,10 +69,14 @@ async def run_from_config(self, config: RolloutCollectionConfig):
             f"The tqdm progress bar will only update every {tqdm_miniters} samples that finish to ensure that you are not being spammed."
         )
 
+        if config.responses_create_params:
+            print(f"Overriding responses_create_params fields with {config.responses_create_params}")
+
         metrics = Counter()
         with open(config.output_jsonl_fpath, "a") as f:
 
             async def _post_coroutine(row: dict) -> None:
+                row["responses_create_params"] = row["responses_create_params"] | config.responses_create_params
                 async with semaphore:
                     response = await server_client.post(server_name=config.agent_name, url_path="/run", json=row)
                     result = await response.json()