Skip to content

Commit c70fea4

Browse files
bxyu-nvidiaabhibha-nvidia
authored andcommitted
VLLMModel fix whitespace stripping and unwarranted spaces (#70)
Signed-off-by: Brian Yu <bxyu@nvidia.com> Signed-off-by: Abhibha Gupta <abhibhag@nvidia.com>
1 parent 5923794 commit c70fea4

File tree

2 files changed

+129
-12
lines changed

2 files changed

+129
-12
lines changed

responses_api_models/vllm_model/app.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def _parse_think_tags(cls, content: str) -> Tuple[List[str], str]:
259259
# Extract reasoning content from between <think></think> tags.
260260
matches = cls.THINK_TAG_PATTERN.findall(content)
261261
# Remove reasoning from main content
262-
cleaned = cls.THINK_TAG_PATTERN.sub("", content).strip()
262+
cleaned = cls.THINK_TAG_PATTERN.sub("", content)
263263
return matches, cleaned
264264

265265
# =======================================================
@@ -375,7 +375,7 @@ def _format_message(
375375
# Handle reasoning
376376
final_content = ""
377377
if isinstance(m["content"], list):
378-
content_str = " ".join([part.get("text", "") for part in m["content"]])
378+
content_str = "".join([part.get("text", "") for part in m["content"]])
379379
final_content += content_str
380380
elif isinstance(m["content"], str):
381381
final_content += m["content"]
@@ -463,8 +463,7 @@ def postprocess_chat_response(self, choice: NeMoGymChoice) -> List[NeMoGymRespon
463463
id=f"rs_{uuid4().hex}",
464464
type="reasoning",
465465
summary=[
466-
NeMoGymSummary(text=reasoning_text.strip(), type="summary_text")
467-
for reasoning_text in reasoning_matches
466+
NeMoGymSummary(text=reasoning_text, type="summary_text") for reasoning_text in reasoning_matches
468467
],
469468
status="completed",
470469
)

responses_api_models/vllm_model/tests/test_app.py

Lines changed: 126 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from fastapi.testclient import TestClient
1919
from pytest import MonkeyPatch, mark
2020

21+
import nemo_gym.server_utils
2122
from nemo_gym import PARENT_DIR
2223
from nemo_gym.openai_utils import (
2324
NeMoGymAsyncOpenAI,
@@ -659,7 +660,7 @@ class FakeUUID:
659660

660661

661662
class TestApp:
662-
def _setup_server(self):
663+
def _setup_server(self, monkeypatch: MonkeyPatch):
663664
config = VLLMModelConfig(
664665
host="0.0.0.0",
665666
port=8081,
@@ -670,13 +671,18 @@ def _setup_server(self):
670671
name="",
671672
return_token_id_information=False,
672673
)
674+
675+
get_global_config_dict_mock = MagicMock()
676+
get_global_config_dict_mock.return_value = dict()
677+
monkeypatch.setattr(nemo_gym.server_utils, "get_global_config_dict", get_global_config_dict_mock)
678+
673679
return VLLMModel(config=config, server_client=MagicMock(spec=ServerClient))
674680

675-
async def test_sanity(self) -> None:
676-
self._setup_server()
681+
async def test_sanity(self, monkeypatch: MonkeyPatch) -> None:
682+
self._setup_server(monkeypatch)
677683

678684
def test_responses_multistep(self, monkeypatch: MonkeyPatch):
679-
server = self._setup_server()
685+
server = self._setup_server(monkeypatch)
680686
app = server.setup_webserver()
681687
client = TestClient(app)
682688

@@ -881,7 +887,7 @@ def _standardize(messages: list) -> list:
881887
assert expected_sent_tools == actual_sent_tools
882888

883889
def test_responses_multiturn(self, monkeypatch: MonkeyPatch):
884-
server = self._setup_server()
890+
server = self._setup_server(monkeypatch)
885891
app = server.setup_webserver()
886892
client = TestClient(app)
887893

@@ -1019,7 +1025,7 @@ def test_responses_multiturn(self, monkeypatch: MonkeyPatch):
10191025
assert expected_sent_messages == sent_messages
10201026

10211027
def test_responses_multistep_multiturn(self, monkeypatch: MonkeyPatch):
1022-
server = self._setup_server()
1028+
server = self._setup_server(monkeypatch)
10231029
app = server.setup_webserver()
10241030
client = TestClient(app)
10251031

@@ -1375,7 +1381,7 @@ def test_responses_e2e(
13751381
Test entire pipeline from api endpoint -> final output:
13761382
Response Create Params -> Response
13771383
"""
1378-
server = self._setup_server()
1384+
server = self._setup_server(monkeypatch)
13791385
app = server.setup_webserver()
13801386
client = TestClient(app)
13811387

@@ -1425,7 +1431,7 @@ def test_responses_to_chat_completion_create_params(
14251431
Tests conversion from api endpoint -> internal request schema
14261432
Response Params -> Chat Completion Params
14271433
"""
1428-
server = self._setup_server()
1434+
server = self._setup_server(monkeypatch)
14291435
app = server.setup_webserver()
14301436
client = TestClient(app)
14311437

@@ -2026,3 +2032,115 @@ def test_round_trip_chat_completions_return_token_id_information(self) -> None:
20262032

20272033
expected_output = test_data["expected_output_return_token_id_information"]
20282034
assert expected_output == chat_completion_create_params.model_dump()
2035+
2036+
def test_whitespace_round_trip_chat_completions(self, monkeypatch: MonkeyPatch) -> None:
2037+
monkeypatch.setattr("responses_api_models.vllm_model.app.uuid4", lambda: FakeUUID())
2038+
2039+
message = NeMoGymChatCompletionMessage(
2040+
content="<think> \n \n I'm thinking \n \n </think> \n \n I'm chatting! \n \n ",
2041+
role="assistant",
2042+
tool_calls=[
2043+
NeMoGymChatCompletionMessageToolCall(
2044+
id="tool call 1",
2045+
function=NeMoGymFunction(name="get_weather", arguments='{"city_name": "new york"}'),
2046+
type="function",
2047+
),
2048+
NeMoGymChatCompletionMessageToolCall(
2049+
id="tool call 2",
2050+
function=NeMoGymFunction(name="get_weather", arguments='{"city_name": "boston"}'),
2051+
type="function",
2052+
),
2053+
],
2054+
)
2055+
actual_response_output_items = self.converter.postprocess_chat_response(
2056+
choice=NeMoGymChoice(
2057+
finish_reason="tool_calls",
2058+
index=0,
2059+
message=message,
2060+
)
2061+
)
2062+
expected_response_output_items = [
2063+
NeMoGymResponseReasoningItem(
2064+
id="rs_123",
2065+
summary=[NeMoGymSummary(text=" \n \n I'm thinking \n \n ", type="summary_text")],
2066+
type="reasoning",
2067+
encrypted_content=None,
2068+
),
2069+
NeMoGymResponseOutputMessage(
2070+
id="msg_123",
2071+
content=[
2072+
NeMoGymResponseOutputText(
2073+
annotations=[], text=" \n \n I'm chatting! \n \n ", type="output_text", logprobs=None
2074+
)
2075+
],
2076+
role="assistant",
2077+
status="completed",
2078+
type="message",
2079+
),
2080+
NeMoGymResponseFunctionToolCall(
2081+
arguments='{"city_name": "new york"}',
2082+
call_id="tool call 1",
2083+
name="get_weather",
2084+
type="function_call",
2085+
id="tool call 1",
2086+
status="completed",
2087+
),
2088+
NeMoGymResponseFunctionToolCall(
2089+
arguments='{"city_name": "boston"}',
2090+
call_id="tool call 2",
2091+
name="get_weather",
2092+
type="function_call",
2093+
id="tool call 2",
2094+
status="completed",
2095+
),
2096+
]
2097+
assert expected_response_output_items == actual_response_output_items
2098+
2099+
chat_completion_create_params = self.converter.responses_to_chat_completion_create_params(
2100+
responses_create_params=NeMoGymResponseCreateParamsNonStreaming(
2101+
input=[
2102+
NeMoGymEasyInputMessage(
2103+
content=" \n \n system \n \n ",
2104+
role="system",
2105+
),
2106+
NeMoGymEasyInputMessage(
2107+
content=" \n \n hello! \n \n ",
2108+
role="user",
2109+
),
2110+
*actual_response_output_items,
2111+
],
2112+
)
2113+
)
2114+
actual_messages = chat_completion_create_params.messages
2115+
2116+
expected_messages = [
2117+
NeMoGymChatCompletionSystemMessageParam(
2118+
content=" \n \n system \n \n ",
2119+
role="system",
2120+
),
2121+
NeMoGymChatCompletionUserMessageParam(
2122+
content=" \n \n hello! \n \n ",
2123+
role="user",
2124+
),
2125+
NeMoGymChatCompletionAssistantMessageParam(
2126+
role="assistant",
2127+
content="<think> \n \n I'm thinking \n \n </think> \n \n I'm chatting! \n \n ",
2128+
tool_calls=[
2129+
NeMoGymChatCompletionMessageToolCallParam(
2130+
id="tool call 1",
2131+
function=NeMoGymChatCompletionMessageToolCallFunctionParam(
2132+
name="get_weather", arguments='{"city_name": "new york"}'
2133+
),
2134+
type="function",
2135+
),
2136+
NeMoGymChatCompletionMessageToolCallParam(
2137+
id="tool call 2",
2138+
function=NeMoGymChatCompletionMessageToolCallFunctionParam(
2139+
name="get_weather", arguments='{"city_name": "boston"}'
2140+
),
2141+
type="function",
2142+
),
2143+
],
2144+
),
2145+
]
2146+
assert expected_messages == actual_messages

0 commit comments

Comments
 (0)