NVIDIA-NeMo
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 29 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 134 additions & 64 deletions b/‎README.md‎
Lines changed: 134 additions & 64 deletions
diff --git a/‎nemo_gym/base_resources_server.py‎
Lines changed: 16 additions & 2 deletions b/‎nemo_gym/base_resources_server.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎nemo_gym/base_responses_api_agent.py‎
Lines changed: 4 additions & 2 deletions b/‎nemo_gym/base_responses_api_agent.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎nemo_gym/base_responses_api_model.py‎
Lines changed: 4 additions & 2 deletions b/‎nemo_gym/base_responses_api_model.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎nemo_gym/cli.py‎
Lines changed: 66 additions & 16 deletions b/‎nemo_gym/cli.py‎
Lines changed: 66 additions & 16 deletions
diff --git a/‎nemo_gym/config_types.py‎
Lines changed: 12 additions & 1 deletion b/‎nemo_gym/config_types.py‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎nemo_gym/dataset_viewer.py‎
Lines changed: 2 additions & 2 deletions b/‎nemo_gym/dataset_viewer.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎nemo_gym/openai_utils.py‎
Lines changed: 9 additions & 0 deletions b/‎nemo_gym/openai_utils.py‎
Lines changed: 9 additions & 0 deletions
@@ -1,5 +1,34 @@
 # Contributing To NeMo-Gym
 
+## Quality control
+A checklist for all verifier data to be submitted to Nemo Gym. Please follow this pipeline before submitting a merge request.
+
+1. Necessary information to be included in the merge request:
+   1. Corresponding dataset on the spreadsheet.
+   2. Description of the prompt. What is the source of the prompt, which domain is it covering?
+   3. Description of the environment, if there is any.
+   4. Description of the verifier. How is it verified and whether we have checked the correctness of the verifier.
+   5. Legal approval status? If synthetically generated by ourselves with open models, please note there so that we know we don’t need legal approval. 
+2. Simple correctness check: After finishing implementing your own resources_servers (and/or your own customized code for more complicated tasks), please follow the guideline here to run the server, query OpenAI gpt-4o model (or any other model you like) and get 5 example rollouts and corresponding rewards there. Please include in your PR merge request:
+   1. The command you used to run the server for the uploaded data
+   2. The resulting rollout and judges (include 5 examples here for people to understand better the data samples, and to ensure reward here is correct.)
+   3. Other additional notes for running the server properly with the new PR.
+3. Test: Please follow the guideline here to implement your own test and run test for your environment. Tests are strongly encouraged and you must have at least one test for every server you make. Test coverage is not explicitly required which means that YOU ARE RESPONSIBLE FOR YOUR OWN SERVER CORRECTNESS AND FUNCTION.
+4. Reward Profiling: Please run inference on your prompts and environments (a ~500 small subset is OK) on two models:
+   1. Qwen 3 30B A3B
+   2. Qwen 3 235B Instruct (if that’s for agent / agentic coding / instruction following / game environments) or Qwen 3 235B Thinking (if math / competition coding)
+   3. Generate 16 responses for each prompt, and report the reward distribution (percent of all correct, all incorrect, and mixture of correct & incorrect prompts there).
+   4. [If using tool calling] Please also provide metrics around the number of tool calls issued on average per prompt in the environment, and the correlation of the reward with the number of tool calls. 
+5. [After Nemo Gym + Nemo RL integration is done] Training-based correctness check: Please train on the following models with GRPO and include both training accuracy curve and test benchmark accuracy curve:
+   1. Qwen 30B A3B Instruct
+   2. [With more compute available] Qwen 235B Instruct
+6. [PR Check and Review] Please assign another person in your team for reproducing and reviewing the PRs once it’s ready. The person for review needs to
+   1. Verify the content for all the above 1-5 steps 
+   2. Check the correctness of the 5 examples
+   3. Re-run the procedure provided in README to ensure one can generate the same dataset
+   4. After the person confirms reproduction and gives greenlight on the PR, please ping @banghuaz-nvidia @bxyu-nvidia.
+
+
 ## Signing Your Work
 
 * We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license.
 
@@ -20,10 +20,10 @@
     NeMoGymResponse,
     NeMoGymResponseCreateParamsNonStreaming,
 )
-from nemo_gym.server_utils import BaseRunServerConfig, BaseServer, SimpleServer
+from nemo_gym.server_utils import BaseRunServerInstanceConfig, BaseServer, SimpleServer
 
 
-class BaseResourcesServerConfig(BaseRunServerConfig):
+class BaseResourcesServerConfig(BaseRunServerInstanceConfig):
     pass
 
 
@@ -43,16 +43,30 @@ class BaseVerifyResponse(BaseVerifyRequest):
     reward: float
 
 
+class BaseSeedSessionRequest(BaseModel):
+    pass
+
+
+class BaseSeedSessionResponse(BaseModel):
+    pass
+
+
 class SimpleResourcesServer(BaseResourcesServer, SimpleServer):
     config: BaseResourcesServerConfig
 
     def setup_webserver(self) -> FastAPI:
         app = FastAPI()
 
+        self.setup_session_middleware(app)
+
+        app.post("/seed_session")(self.seed_session)
         app.post("/verify")(self.verify)
 
         return app
 
+    async def seed_session(self, body: BaseSeedSessionRequest) -> BaseSeedSessionResponse:
+        return BaseSeedSessionResponse()
+
     @abstractmethod
     async def verify(self, body: BaseVerifyRequest) -> BaseVerifyResponse:
         pass
@@ -20,10 +20,10 @@
     NeMoGymResponse,
     NeMoGymResponseCreateParamsNonStreaming,
 )
-from nemo_gym.server_utils import BaseRunServerConfig, BaseServer, SimpleServer
+from nemo_gym.server_utils import BaseRunServerInstanceConfig, BaseServer, SimpleServer
 
 
-class BaseResponsesAPIAgentConfig(BaseRunServerConfig):
+class BaseResponsesAPIAgentConfig(BaseRunServerInstanceConfig):
     pass
 
 
@@ -37,6 +37,8 @@ class SimpleResponsesAPIAgent(BaseResponsesAPIAgent, SimpleServer):
     def setup_webserver(self) -> FastAPI:
         app = FastAPI()
 
+        self.setup_session_middleware(app)
+
         app.post("/v1/responses")(self.responses)
         app.post("/run")(self.run)
 
 
@@ -21,10 +21,10 @@
     NeMoGymResponse,
     NeMoGymResponseCreateParamsNonStreaming,
 )
-from nemo_gym.server_utils import BaseRunServerConfig, BaseServer, SimpleServer
+from nemo_gym.server_utils import BaseRunServerInstanceConfig, BaseServer, SimpleServer
 
 
-class BaseResponsesAPIModelConfig(BaseRunServerConfig):
+class BaseResponsesAPIModelConfig(BaseRunServerInstanceConfig):
     pass
 
 
@@ -36,6 +36,8 @@ class SimpleResponsesAPIModel(BaseResponsesAPIModel, SimpleServer):
     def setup_webserver(self) -> FastAPI:
         app = FastAPI()
 
+        self.setup_session_middleware(app)
+
         app.post("/v1/chat/completions")(self.chat_completions)
 
         app.post("/v1/responses")(self.responses)
 
@@ -20,6 +20,7 @@
 from pathlib import Path
 from subprocess import Popen
 from threading import Thread
+from time import sleep
 from typing import Dict, List, Optional
 
 from devtools import pprint
@@ -35,7 +36,7 @@
     GlobalConfigDictParserConfig,
     get_global_config_dict,
 )
-from nemo_gym.server_utils import HeadServer
+from nemo_gym.server_utils import HEAD_SERVER_KEY_NAME, HeadServer, ServerClient, ServerStatus
 
 
 def _setup_env_command(dir_path: Path) -> str:  # pragma: no cover
@@ -71,7 +72,7 @@ def model_post_init(self, context):  # pragma: no cover
         return super().model_post_init(context)
 
 
-class ServerInstance(BaseModel):
+class ServerInstanceDisplayConfig(BaseModel):
     process_name: str
     server_type: str
     name: str
@@ -87,7 +88,8 @@ class ServerInstance(BaseModel):
 class RunHelper:  # pragma: no cover
     _head_server_thread: Thread
     _processes: Dict[str, Popen]
-    _server_instances: List[ServerInstance]
+    _server_instance_display_configs: List[ServerInstanceDisplayConfig]
+    _server_client: ServerClient
 
     def start(self, global_config_dict_parser_config: GlobalConfigDictParserConfig) -> None:
         global_config_dict = get_global_config_dict(global_config_dict_parser_config=global_config_dict_parser_config)
@@ -100,8 +102,8 @@ def start(self, global_config_dict_parser_config: GlobalConfigDictParserConfig)
 
         top_level_paths = [k for k in global_config_dict.keys() if k not in NEMO_GYM_RESERVED_TOP_LEVEL_KEYS]
 
-        processes: Dict[str, Popen] = dict()
-        server_instances: List[ServerInstance] = []
+        self._processes: Dict[str, Popen] = dict()
+        self._server_instance_display_configs: List[ServerInstanceDisplayConfig] = []
 
         # TODO there is a better way to resolve this that uses nemo_gym/global_config.py::ServerInstanceConfig
         for top_level_path in top_level_paths:
@@ -133,13 +135,13 @@ def start(self, global_config_dict_parser_config: GlobalConfigDictParserConfig)
     python {str(entrypoint_fpath)}"""
 
             process = _run_command(command, dir_path)
-            processes[top_level_path] = process
+            self._processes[top_level_path] = process
 
             host = server_config_dict.get("host")
             port = server_config_dict.get("port")
 
-            server_instances.append(
-                ServerInstance(
+            self._server_instance_display_configs.append(
+                ServerInstanceDisplayConfig(
                     process_name=top_level_path,
                     server_type=first_key,
                     name=second_key,
@@ -153,14 +155,25 @@ def start(self, global_config_dict_parser_config: GlobalConfigDictParserConfig)
                 )
             )
 
-        self._processes = processes
-        self._server_instances = server_instances
+        self._server_client = ServerClient(
+            head_server_config=ServerClient.load_head_server_config(),
+            global_config_dict=global_config_dict,
+        )
+
+        print("Waiting for head server to spin up")
+        while True:
+            status = self._server_client.poll_for_status(HEAD_SERVER_KEY_NAME)
+            if status == "success":
+                break
 
-        # TODO: Server block summaries may get cut off/interleaved by other process output(s)
-        self.display_server_instance_info()
+            print(f"Head server is not up yet (status `{status}`). Sleeping 3s")
+            sleep(3)
+
+        print("Waiting for servers to spin up")
+        self.wait_for_spinup()
 
     def display_server_instance_info(self) -> None:
-        if not getattr(self, "_server_instances", None):
+        if not self._server_instance_display_configs:
             print("No server instances to display.")
             return
 
@@ -172,7 +185,7 @@ def display_server_instance_info(self) -> None:
 {"#" * 100}
 """)
 
-        for i, inst in enumerate(self._server_instances, 1):
+        for i, inst in enumerate(self._server_instance_display_configs, 1):
             print(f"[{i}] {inst.process_name} ({inst.server_type}/{inst.name})")
             pprint(inst.model_dump())
         print(f"{'#' * 100}\n")
@@ -185,12 +198,37 @@ def poll(self) -> None:
             if process.poll() is not None:
                 raise RuntimeError(f"Process `{process_name}` finished unexpectedly!")
 
+    def wait_for_spinup(self) -> None:
+        sleep_interval = 3
+
+        # Until we spin up or error out.
+        while True:
+            self.poll()
+            statuses = self.check_http_server_statuses()
+
+            num_spun_up = statuses.count("success")
+            if len(statuses) != num_spun_up:
+                print(
+                    f"""{num_spun_up} / {len(statuses)} servers ready ({statuses.count("timeout")} timed out, {statuses.count("connection_error")} connection errored, {statuses.count("unknown_error")} had unknown errors).
+Waiting for servers to spin up. Sleeping {sleep_interval}s..."""
+                )
+            else:
+                print(f"All {num_spun_up} / {len(statuses)} servers ready! Polling every 60s")
+                self.display_server_instance_info()
+                return
+
+            sleep(sleep_interval)
+
     def run_forever(self) -> None:
         async def sleep():
             # Indefinitely
             while True:
                 self.poll()
-                await asyncio.sleep(60)  # Check every 60s.
+
+                statuses = self.check_http_server_statuses()
+                assert statuses.count("success") == len(statuses), "Found non-success statuses"
+
+                await asyncio.sleep(60)
 
         try:
             asyncio.run(sleep())
@@ -204,6 +242,18 @@ async def sleep():
 
             print("NeMo Gym finished!")
 
+    def check_http_server_statuses(self) -> List[ServerStatus]:
+        print(
+            "Checking for HTTP server statuses (you should see some HTTP requests to `/` that may 404. This is expected.)"
+        )
+        statuses = []
+        for server_instance_display_config in self._server_instance_display_configs:
+            name = server_instance_display_config.config_path
+            status = self._server_client.poll_for_status(name)
+            statuses.append(status)
+
+        return statuses
+
 
 def run(
     global_config_dict_parser_config: Optional[GlobalConfigDictParserConfig] = None,
@@ -447,7 +497,7 @@ def init_resources_server():  # pragma: no cover
         name: {server_type_name}_resources_server
       model_server:
         type: responses_api_models
-        name: openai_model
+        name: policy_model
       datasets:
       - name: train
         type: train
 
@@ -87,7 +87,14 @@ class DatasetConfig(BaseModel):
     jsonl_fpath: str
 
     gitlab_identifier: Optional[JsonlDatasetGitlabIdentifer] = None
-    license: Optional[Union[Literal["Apache 2.0"], Literal["TBD"]]] = None
+    license: Optional[
+        Union[
+            Literal["Apache 2.0"],
+            Literal["Creative Commons Attribution 4.0 International"],
+            Literal["Creative Commons Attribution-ShareAlike 4.0 International"],
+            Literal["TBD"],
+        ]
+    ] = None
 
     @model_validator(mode="after")
     def check_train_validation_sets(self) -> "DatasetConfig":
@@ -112,6 +119,10 @@ class BaseRunServerConfig(BaseServerConfig):
     entrypoint: str
 
 
+class BaseRunServerInstanceConfig(BaseRunServerConfig):
+    name: str  # This name is unique at runtime.
+
+
 ########################################
 # Server type and server instance configs
 ########################################
 
@@ -51,7 +51,7 @@ def format_function_call_output(m: FunctionCallOutput) -> List[ChatMessage]:
         ChatMessage(
             content=content,
             role="assistant",
-            metadata=MetadataDict(title="Function call output", status="done"),
+            metadata=MetadataDict(title=f"Function call output (tool call ID `{m['call_id']}`)", status="done"),
         )
     ]
 
@@ -67,7 +67,7 @@ def format_function_call(m: ResponseFunctionToolCallParam) -> List[ChatMessage]:
         ChatMessage(
             content=content,
             role="assistant",
-            metadata=MetadataDict(title=f"Function call - `{name}`", status="done"),
+            metadata=MetadataDict(title=f"Function call - `{name}` (tool call ID `{m['call_id']}`)", status="done"),
         )
     ]
 
 
@@ -197,6 +197,15 @@ class NeMoGymResponseReasoningItemForTraining(NeMoGymResponseReasoningItem, Toke
     pass
 
 
+RESPONSES_TO_TRAIN = {
+    NeMoGymEasyInputMessage: NeMoGymEasyInputMessageForTraining,
+    NeMoGymMessage: NeMoGymMessageForTraining,
+    NeMoGymResponseOutputMessage: NeMoGymResponseOutputMessageForTraining,
+    NeMoGymResponseFunctionToolCall: NeMoGymResponseFunctionToolCallForTraining,
+    NeMoGymResponseReasoningItem: NeMoGymResponseReasoningItemForTraining,
+}
+
+
 NeMoGymResponseInputItem = Union[
     NeMoGymEasyInputMessage,
     NeMoGymMessage,
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ def format_function_call_output(m: FunctionCallOutput) -> List[ChatMessage]:`
`51`	`51`	`ChatMessage(`
`52`	`52`	`content=content,`
`53`	`53`	`role="assistant",`
`54`		`- metadata=MetadataDict(title="Function call output", status="done"),`
	`54`	+ metadata=MetadataDict(title=f"Function call output (tool call ID `{m['call_id']}`)", status="done"),
`55`	`55`	`)`
`56`	`56`	`]`
`57`	`57`
`@@ -67,7 +67,7 @@ def format_function_call(m: ResponseFunctionToolCallParam) -> List[ChatMessage]:`
`67`	`67`	`ChatMessage(`
`68`	`68`	`content=content,`
`69`	`69`	`role="assistant",`
`70`		- metadata=MetadataDict(title=f"Function call - `{name}`", status="done"),
	`70`	+ metadata=MetadataDict(title=f"Function call - `{name}` (tool call ID `{m['call_id']}`)", status="done"),
`71`	`71`	`)`
`72`	`72`	`]`
`73`	`73`