From baea83cb3d7c26d8607da803175d65b4d8c2370f Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 08:41:59 -0700 Subject: [PATCH 01/12] fix: use agent's configured model for eval simulations instead of gpt-4.1-mini default The LLM mocker and input mocker for eval simulations were falling back to the chat_completions default model (gpt-4.1-mini) when no simulation-specific model was configured. This caused 417 errors in EU regions where gpt-4.1-mini isn't provisioned for the agentsplayground product. Now threads the agent's own model through MockingContext so simulations use the same model the agent is configured with. The model is read from agent.json (server/debug path) or from the runtime schema (CLI eval path). Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/src/uipath/_cli/cli_eval.py | 6 +++++- .../src/uipath/eval/mocks/_input_mocker.py | 5 +++++ .../src/uipath/eval/mocks/_llm_mocker.py | 4 ++++ .../src/uipath/eval/mocks/_mock_runtime.py | 18 ++++++++++++++++++ .../uipath/src/uipath/eval/mocks/_types.py | 1 + .../uipath/src/uipath/eval/runtime/context.py | 1 + .../uipath/src/uipath/eval/runtime/runtime.py | 2 ++ 7 files changed, 36 insertions(+), 1 deletion(-) diff --git a/packages/uipath/src/uipath/_cli/cli_eval.py b/packages/uipath/src/uipath/_cli/cli_eval.py index ef1edb200..e82c33fa9 100644 --- a/packages/uipath/src/uipath/_cli/cli_eval.py +++ b/packages/uipath/src/uipath/_cli/cli_eval.py @@ -428,10 +428,14 @@ async def execute_eval(): eval_context.runtime_schema = await runtime.get_schema() + eval_context.agent_model = _get_agent_model( + eval_context.runtime_schema + ) + eval_context.evaluators = await EvalHelpers.load_evaluators( resolved_eval_set_path, eval_context.evaluation_set, - _get_agent_model(eval_context.runtime_schema), + eval_context.agent_model, ) # Runtime is not required anymore. diff --git a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py index f1d253ba8..89a57a091 100644 --- a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py @@ -65,6 +65,7 @@ async def generate_llm_input( input_schema: dict[str, Any], expected_behavior: str, expected_output: dict[str, Any], + agent_model: str | None = None, ) -> dict[str, Any]: """Generate synthetic input using an LLM based on the evaluation context.""" # Set custom span attributes to match agents repo pattern @@ -117,6 +118,10 @@ async def generate_llm_input( else {} ) + # Use the agent's configured model when no simulation-specific model is set + if "model" not in completion_kwargs and agent_model: + completion_kwargs["model"] = agent_model + if cache_manager is not None: cache_key_data = { "response_format": response_format, diff --git a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py index 194aa6c09..1c5ac5009 100644 --- a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py @@ -182,6 +182,10 @@ async def response( else {} ) + # Use the agent's configured model when no simulation-specific model is set + if "model" not in completion_kwargs and self.context.agent_model: + completion_kwargs["model"] = self.context.agent_model + formatted_prompt = PROMPT.format(**prompt_generation_args) cache_key_data = { diff --git a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py index 512d8d6ee..5ba404650 100644 --- a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py +++ b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py @@ -34,6 +34,20 @@ logger = logging.getLogger(__name__) +def _read_agent_model() -> str | None: + """Read the agent's configured model from agent.json in current directory.""" + agent_path = Path.cwd() / "agent.json" + if not agent_path.exists(): + return None + try: + with open(agent_path, "r", encoding="utf-8") as f: + agent_data = json.load(f) + return agent_data.get("settings", {}).get("model") + except Exception as e: + logger.warning(f"Failed to read agent model from agent.json: {e}") + return None + + def load_simulation_config() -> MockingContext | None: """Load simulation.json from current directory and convert to MockingContext. @@ -70,11 +84,15 @@ def load_simulation_config() -> MockingContext | None: tools_to_simulate=tools_to_simulate, ) + # Read agent model so simulations use the same model as the agent + agent_model = _read_agent_model() + # Create MockingContext for debugging mocking_context = MockingContext( strategy=mocking_strategy, name="debug-simulation", inputs={}, + agent_model=agent_model, ) logger.info(f"Loaded simulation config for {len(tools_to_simulate)} tool(s)") diff --git a/packages/uipath/src/uipath/eval/mocks/_types.py b/packages/uipath/src/uipath/eval/mocks/_types.py index 827569879..043613c44 100644 --- a/packages/uipath/src/uipath/eval/mocks/_types.py +++ b/packages/uipath/src/uipath/eval/mocks/_types.py @@ -127,6 +127,7 @@ class MockingContext(BaseModel): strategy: MockingStrategy | None inputs: dict[str, Any] = Field(default_factory=lambda: {}) name: str = Field(default="debug") + agent_model: str | None = Field(default=None) class ExampleCall(BaseModel): diff --git a/packages/uipath/src/uipath/eval/runtime/context.py b/packages/uipath/src/uipath/eval/runtime/context.py index b8224718c..81263c54d 100644 --- a/packages/uipath/src/uipath/eval/runtime/context.py +++ b/packages/uipath/src/uipath/eval/runtime/context.py @@ -27,3 +27,4 @@ class UiPathEvalContext: input_overrides: dict[str, Any] | None = None resume: bool = False job_id: str | None = None + agent_model: str | None = None diff --git a/packages/uipath/src/uipath/eval/runtime/runtime.py b/packages/uipath/src/uipath/eval/runtime/runtime.py index 1c32b9efe..ffba867b4 100644 --- a/packages/uipath/src/uipath/eval/runtime/runtime.py +++ b/packages/uipath/src/uipath/eval/runtime/runtime.py @@ -534,6 +534,7 @@ async def _execute_eval( strategy=eval_item.mocking_strategy, name=eval_item.name, inputs=eval_item.inputs, + agent_model=self.context.agent_model, ), eval_set_run_id=self.context.eval_set_run_id, ) @@ -816,6 +817,7 @@ async def _generate_input_for_eval( (await self.get_schema()).input, expected_behavior=eval_item.expected_agent_behavior or "", expected_output=expected_output, + agent_model=self.context.agent_model, ) updated_eval_item = eval_item.model_copy(update={"inputs": generated_input}) return updated_eval_item From b336544c1349f3f0a7c28b0e247559a8958557cd Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 08:49:53 -0700 Subject: [PATCH 02/12] refactor: pass agent_model from runtime schema instead of reading agent.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove _read_agent_model() file-reading hack. The model is already available from the runtime schema — pass it through UiPathMockRuntime from the callers (cli_debug.py and eval runtime) who have access to it. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/src/uipath/_cli/cli_debug.py | 7 +++++ .../src/uipath/eval/mocks/_mock_runtime.py | 29 +++++++------------ .../uipath/src/uipath/eval/runtime/runtime.py | 1 + 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/packages/uipath/src/uipath/_cli/cli_debug.py b/packages/uipath/src/uipath/_cli/cli_debug.py index 44415dd26..eb2ebc160 100644 --- a/packages/uipath/src/uipath/_cli/cli_debug.py +++ b/packages/uipath/src/uipath/_cli/cli_debug.py @@ -163,8 +163,15 @@ async def execute_debug_runtime(): trigger_poll_interval=trigger_poll_interval, ) + # Get agent model from runtime schema for simulations + schema = await runtime.get_schema() + agent_model = None + if schema.metadata and "settings" in schema.metadata: + agent_model = schema.metadata["settings"].get("model") + mock_runtime = UiPathMockRuntime( delegate=debug_runtime, + agent_model=agent_model, ) try: diff --git a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py index 5ba404650..63814b13d 100644 --- a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py +++ b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py @@ -34,21 +34,7 @@ logger = logging.getLogger(__name__) -def _read_agent_model() -> str | None: - """Read the agent's configured model from agent.json in current directory.""" - agent_path = Path.cwd() / "agent.json" - if not agent_path.exists(): - return None - try: - with open(agent_path, "r", encoding="utf-8") as f: - agent_data = json.load(f) - return agent_data.get("settings", {}).get("model") - except Exception as e: - logger.warning(f"Failed to read agent model from agent.json: {e}") - return None - - -def load_simulation_config() -> MockingContext | None: +def load_simulation_config(agent_model: str | None = None) -> MockingContext | None: """Load simulation.json from current directory and convert to MockingContext. Returns: @@ -84,9 +70,6 @@ def load_simulation_config() -> MockingContext | None: tools_to_simulate=tools_to_simulate, ) - # Read agent model so simulations use the same model as the agent - agent_model = _read_agent_model() - # Create MockingContext for debugging mocking_context = MockingContext( strategy=mocking_strategy, @@ -151,9 +134,17 @@ def __init__( span_collector: ExecutionSpanCollector | None = None, execution_id: str | None = None, eval_set_run_id: str | None = None, + agent_model: str | None = None, ): self.delegate = delegate - self._mocking_context = mocking_context or load_simulation_config() + self._mocking_context = mocking_context or load_simulation_config( + agent_model=agent_model + ) + # If mocking_context was passed without agent_model, inject it + if self._mocking_context and not self._mocking_context.agent_model and agent_model: + self._mocking_context = self._mocking_context.model_copy( + update={"agent_model": agent_model} + ) self._span_collector = span_collector or ExecutionSpanCollector() self._execution_id = execution_id or str(uuid.uuid4()) self._eval_set_run_id = eval_set_run_id diff --git a/packages/uipath/src/uipath/eval/runtime/runtime.py b/packages/uipath/src/uipath/eval/runtime/runtime.py index ffba867b4..8b25b01eb 100644 --- a/packages/uipath/src/uipath/eval/runtime/runtime.py +++ b/packages/uipath/src/uipath/eval/runtime/runtime.py @@ -870,6 +870,7 @@ async def execute_runtime( span_collector=self.span_collector, execution_id=execution_id, eval_set_run_id=eval_set_run_id, + agent_model=self.context.agent_model, ) execution_runtime = UiPathExecutionRuntime( From cd5aeacd01babb2602cdbce52ca0bf294f12968f Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 09:08:16 -0700 Subject: [PATCH 03/12] fix: ruff format and update test mocks for get_schema call - Fix ruff formatting in _mock_runtime.py - Add get_schema AsyncMock to mock_runtime in debug simulation tests, since cli_debug.py now calls runtime.get_schema() to extract the agent model before creating UiPathMockRuntime Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/src/uipath/eval/mocks/_mock_runtime.py | 6 +++++- packages/uipath/tests/cli/test_debug_simulation.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py index 63814b13d..bf2dae649 100644 --- a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py +++ b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py @@ -141,7 +141,11 @@ def __init__( agent_model=agent_model ) # If mocking_context was passed without agent_model, inject it - if self._mocking_context and not self._mocking_context.agent_model and agent_model: + if ( + self._mocking_context + and not self._mocking_context.agent_model + and agent_model + ): self._mocking_context = self._mocking_context.model_copy( update={"agent_model": agent_model} ) diff --git a/packages/uipath/tests/cli/test_debug_simulation.py b/packages/uipath/tests/cli/test_debug_simulation.py index b2d795c79..d9266327b 100644 --- a/packages/uipath/tests/cli/test_debug_simulation.py +++ b/packages/uipath/tests/cli/test_debug_simulation.py @@ -241,6 +241,9 @@ def test_debug_always_wraps_with_mock_runtime( ) as mock_factory_get: mock_runtime = Mock() mock_runtime.dispose = AsyncMock() + mock_runtime.get_schema = AsyncMock( + return_value=Mock(metadata=None) + ) mock_factory = Mock() mock_factory.new_runtime = AsyncMock(return_value=mock_runtime) @@ -305,6 +308,9 @@ def test_debug_wraps_with_mock_runtime_on_error( ) as mock_factory_get: mock_runtime = Mock() mock_runtime.dispose = AsyncMock() + mock_runtime.get_schema = AsyncMock( + return_value=Mock(metadata=None) + ) mock_factory = Mock() mock_factory.new_runtime = AsyncMock(return_value=mock_runtime) From b093568d817b75c9b52285be6b554dfa0e957c7b Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 09:11:55 -0700 Subject: [PATCH 04/12] chore: bump uipath version to 2.10.47 Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/uipath/pyproject.toml b/packages/uipath/pyproject.toml index 7eb42ce59..25c704503 100644 --- a/packages/uipath/pyproject.toml +++ b/packages/uipath/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.10.46" +version = "2.10.47" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" From 5eaea72b3e2979906473175622324eb67378dab2 Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 09:19:10 -0700 Subject: [PATCH 05/12] fix: update uv.lock and add simulation model logging - Regenerate uv.lock after version bump to 2.10.47 - Log the model being used when running simulation mocking Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/src/uipath/eval/mocks/_input_mocker.py | 6 ++++++ packages/uipath/src/uipath/eval/mocks/_llm_mocker.py | 5 +++++ packages/uipath/uv.lock | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py index 89a57a091..2bf6e2efb 100644 --- a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py @@ -1,6 +1,7 @@ """LLM Input Mocker implementation.""" import json +import logging from datetime import datetime from typing import Any @@ -17,6 +18,8 @@ InputMockingStrategy, ) +logger = logging.getLogger(__name__) + def get_input_mocking_prompt( input_schema: str, @@ -122,6 +125,9 @@ async def generate_llm_input( if "model" not in completion_kwargs and agent_model: completion_kwargs["model"] = agent_model + simulation_model = completion_kwargs.get("model", "default") + logger.info(f"Simulating input generation using model: {simulation_model}") + if cache_manager is not None: cache_key_data = { "response_format": response_format, diff --git a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py index 1c5ac5009..e9e797f1c 100644 --- a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py @@ -186,6 +186,11 @@ async def response( if "model" not in completion_kwargs and self.context.agent_model: completion_kwargs["model"] = self.context.agent_model + simulation_model = completion_kwargs.get("model", "default") + logger.info( + f"Simulating tool '{function_name}' using model: {simulation_model}" + ) + formatted_prompt = PROMPT.format(**prompt_generation_args) cache_key_data = { diff --git a/packages/uipath/uv.lock b/packages/uipath/uv.lock index 3c06d2318..f956b3d82 100644 --- a/packages/uipath/uv.lock +++ b/packages/uipath/uv.lock @@ -2543,7 +2543,7 @@ wheels = [ [[package]] name = "uipath" -version = "2.10.46" +version = "2.10.47" source = { editable = "." } dependencies = [ { name = "applicationinsights" }, From 075b14a4c9f7b93c0389a3db5d4818ccb59060e3 Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 09:53:12 -0700 Subject: [PATCH 06/12] fix: log actual default model name instead of 'default' Show gpt-4.1-mini-2025-04-14 in logs when no agent model is set, so it's clear which model the simulation falls back to. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/src/uipath/eval/mocks/_input_mocker.py | 5 ++++- packages/uipath/src/uipath/eval/mocks/_llm_mocker.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py index 2bf6e2efb..dc9fb74b4 100644 --- a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py @@ -10,6 +10,7 @@ from uipath.core.tracing import traced from uipath.platform import UiPath from uipath.platform.chat import UiPathLlmChatService +from uipath.platform.chat._llm_gateway_service import ChatModels from .._execution_context import eval_set_run_id_context from ._mock_context import cache_manager_context @@ -125,7 +126,9 @@ async def generate_llm_input( if "model" not in completion_kwargs and agent_model: completion_kwargs["model"] = agent_model - simulation_model = completion_kwargs.get("model", "default") + simulation_model = completion_kwargs.get( + "model", ChatModels.gpt_4_1_mini_2025_04_14 + ) logger.info(f"Simulating input generation using model: {simulation_model}") if cache_manager is not None: diff --git a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py index e9e797f1c..83e3498fb 100644 --- a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py @@ -10,7 +10,7 @@ from uipath.core.tracing import traced from uipath.platform import UiPath from uipath.platform.chat import UiPathLlmChatService -from uipath.platform.chat._llm_gateway_service import _cleanup_schema +from uipath.platform.chat._llm_gateway_service import ChatModels, _cleanup_schema from .._execution_context import ( eval_set_run_id_context, @@ -186,7 +186,9 @@ async def response( if "model" not in completion_kwargs and self.context.agent_model: completion_kwargs["model"] = self.context.agent_model - simulation_model = completion_kwargs.get("model", "default") + simulation_model = completion_kwargs.get( + "model", ChatModels.gpt_4_1_mini_2025_04_14 + ) logger.info( f"Simulating tool '{function_name}' using model: {simulation_model}" ) From 77d456582bd052c232fcdab22c02095af34e0b84 Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 10:05:02 -0700 Subject: [PATCH 07/12] refactor: set model on strategy instead of threading agent_model through context Per code review feedback: the cleanest approach is to set the correct model on LLMMockingStrategy/InputMockingStrategy before they reach the mocker, rather than adding agent_model to MockingContext. - runtime.py: set strategy.model from agent_model when strategy.model is None - _mock_runtime.py: pass agent_model to ModelSettings on strategy in load_simulation_config - Revert agent_model from MockingContext, LLMMocker fallback, and generate_llm_input param - Keep logging that shows the actual model being used Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/uipath/eval/mocks/_input_mocker.py | 5 ---- .../src/uipath/eval/mocks/_llm_mocker.py | 4 --- .../src/uipath/eval/mocks/_mock_runtime.py | 14 ++------- .../uipath/src/uipath/eval/mocks/_types.py | 1 - .../uipath/src/uipath/eval/runtime/runtime.py | 30 +++++++++++++++---- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py index dc9fb74b4..57a727ec1 100644 --- a/packages/uipath/src/uipath/eval/mocks/_input_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_input_mocker.py @@ -69,7 +69,6 @@ async def generate_llm_input( input_schema: dict[str, Any], expected_behavior: str, expected_output: dict[str, Any], - agent_model: str | None = None, ) -> dict[str, Any]: """Generate synthetic input using an LLM based on the evaluation context.""" # Set custom span attributes to match agents repo pattern @@ -122,10 +121,6 @@ async def generate_llm_input( else {} ) - # Use the agent's configured model when no simulation-specific model is set - if "model" not in completion_kwargs and agent_model: - completion_kwargs["model"] = agent_model - simulation_model = completion_kwargs.get( "model", ChatModels.gpt_4_1_mini_2025_04_14 ) diff --git a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py index 83e3498fb..3715ac226 100644 --- a/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py +++ b/packages/uipath/src/uipath/eval/mocks/_llm_mocker.py @@ -182,10 +182,6 @@ async def response( else {} ) - # Use the agent's configured model when no simulation-specific model is set - if "model" not in completion_kwargs and self.context.agent_model: - completion_kwargs["model"] = self.context.agent_model - simulation_model = completion_kwargs.get( "model", ChatModels.gpt_4_1_mini_2025_04_14 ) diff --git a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py index bf2dae649..03c2c8aa7 100644 --- a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py +++ b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py @@ -28,6 +28,7 @@ LLMMockingStrategy, MockingContext, MockingStrategyType, + ModelSettings, ToolSimulation, ) @@ -63,11 +64,12 @@ def load_simulation_config(agent_model: str | None = None) -> MockingContext | N if not tools_to_simulate: return None - # Create LLM mocking strategy + # Create LLM mocking strategy with the agent's model mocking_strategy = LLMMockingStrategy( type=MockingStrategyType.LLM, prompt=simulation_data.get("instructions", ""), tools_to_simulate=tools_to_simulate, + model=ModelSettings(model=agent_model) if agent_model else None, ) # Create MockingContext for debugging @@ -75,7 +77,6 @@ def load_simulation_config(agent_model: str | None = None) -> MockingContext | N strategy=mocking_strategy, name="debug-simulation", inputs={}, - agent_model=agent_model, ) logger.info(f"Loaded simulation config for {len(tools_to_simulate)} tool(s)") @@ -140,15 +141,6 @@ def __init__( self._mocking_context = mocking_context or load_simulation_config( agent_model=agent_model ) - # If mocking_context was passed without agent_model, inject it - if ( - self._mocking_context - and not self._mocking_context.agent_model - and agent_model - ): - self._mocking_context = self._mocking_context.model_copy( - update={"agent_model": agent_model} - ) self._span_collector = span_collector or ExecutionSpanCollector() self._execution_id = execution_id or str(uuid.uuid4()) self._eval_set_run_id = eval_set_run_id diff --git a/packages/uipath/src/uipath/eval/mocks/_types.py b/packages/uipath/src/uipath/eval/mocks/_types.py index 043613c44..827569879 100644 --- a/packages/uipath/src/uipath/eval/mocks/_types.py +++ b/packages/uipath/src/uipath/eval/mocks/_types.py @@ -127,7 +127,6 @@ class MockingContext(BaseModel): strategy: MockingStrategy | None inputs: dict[str, Any] = Field(default_factory=lambda: {}) name: str = Field(default="debug") - agent_model: str | None = Field(default=None) class ExampleCall(BaseModel): diff --git a/packages/uipath/src/uipath/eval/runtime/runtime.py b/packages/uipath/src/uipath/eval/runtime/runtime.py index 8b25b01eb..73385c141 100644 --- a/packages/uipath/src/uipath/eval/runtime/runtime.py +++ b/packages/uipath/src/uipath/eval/runtime/runtime.py @@ -53,7 +53,7 @@ ) from ..mocks._mock_context import cache_manager_context from ..mocks._mock_runtime import UiPathMockRuntime -from ..mocks._types import MockingContext +from ..mocks._types import LLMMockingStrategy, MockingContext, ModelSettings from ..models import EvaluationResult from ..models.evaluation_set import ( EvaluationItem, @@ -526,15 +526,28 @@ async def _execute_eval( eval_item=eval_item, ), ) + # Set agent model on the mocking strategy if not already set + mocking_strategy = eval_item.mocking_strategy + if ( + mocking_strategy + and isinstance(mocking_strategy, LLMMockingStrategy) + and not mocking_strategy.model + and self.context.agent_model + ): + mocking_strategy = mocking_strategy.model_copy( + update={ + "model": ModelSettings(model=self.context.agent_model) + } + ) + agent_execution_output = await self.execute_runtime( eval_item, execution_id, input_overrides=self.context.input_overrides, mocking_context=MockingContext( - strategy=eval_item.mocking_strategy, + strategy=mocking_strategy, name=eval_item.name, inputs=eval_item.inputs, - agent_model=self.context.agent_model, ), eval_set_run_id=self.context.eval_set_run_id, ) @@ -812,12 +825,18 @@ async def _generate_input_for_eval( or getattr(eval_item, "expected_output", None) or {} ) + # Set agent model on the input mocking strategy if not already set + input_strategy = eval_item.input_mocking_strategy + if input_strategy and not input_strategy.model and self.context.agent_model: + input_strategy = input_strategy.model_copy( + update={"model": ModelSettings(model=self.context.agent_model)} + ) + generated_input = await generate_llm_input( - eval_item.input_mocking_strategy, + input_strategy, (await self.get_schema()).input, expected_behavior=eval_item.expected_agent_behavior or "", expected_output=expected_output, - agent_model=self.context.agent_model, ) updated_eval_item = eval_item.model_copy(update={"inputs": generated_input}) return updated_eval_item @@ -870,7 +889,6 @@ async def execute_runtime( span_collector=self.span_collector, execution_id=execution_id, eval_set_run_id=eval_set_run_id, - agent_model=self.context.agent_model, ) execution_runtime = UiPathExecutionRuntime( From 81c9858cac5c54e226cc01dbb2953af11a2fe83a Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 10:51:04 -0700 Subject: [PATCH 08/12] =?UTF-8?q?refactor:=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20build=20mocking=5Fcontext=20in=20cli=5Fdebug,=20kee?= =?UTF-8?q?p=20cli=5Feval=20minimal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per @akshaylive review: - cli_debug.py: build MockingContext via load_simulation_config(agent_model) and pass it directly, instead of relying on UiPathMockRuntime's fallback - cli_eval.py: keep _get_agent_model() call for load_evaluators unchanged, only store result on eval_context for downstream use Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/src/uipath/_cli/cli_debug.py | 9 +++++++-- packages/uipath/src/uipath/_cli/cli_eval.py | 7 +++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/packages/uipath/src/uipath/_cli/cli_debug.py b/packages/uipath/src/uipath/_cli/cli_debug.py index eb2ebc160..d2e08353b 100644 --- a/packages/uipath/src/uipath/_cli/cli_debug.py +++ b/packages/uipath/src/uipath/_cli/cli_debug.py @@ -9,6 +9,7 @@ from uipath._cli._utils._studio_project import StudioClient from uipath.core.tracing import UiPathTraceManager from uipath.eval.mocks import UiPathMockRuntime +from uipath.eval.mocks._mock_runtime import load_simulation_config from uipath.platform.common import ResourceOverwritesContext, UiPathConfig from uipath.runtime import ( UiPathExecuteOptions, @@ -163,15 +164,19 @@ async def execute_debug_runtime(): trigger_poll_interval=trigger_poll_interval, ) - # Get agent model from runtime schema for simulations + # Build mocking context with agent model for simulations schema = await runtime.get_schema() agent_model = None if schema.metadata and "settings" in schema.metadata: agent_model = schema.metadata["settings"].get("model") + mocking_context = load_simulation_config( + agent_model=agent_model + ) + mock_runtime = UiPathMockRuntime( delegate=debug_runtime, - agent_model=agent_model, + mocking_context=mocking_context, ) try: diff --git a/packages/uipath/src/uipath/_cli/cli_eval.py b/packages/uipath/src/uipath/_cli/cli_eval.py index e82c33fa9..b1e6ad495 100644 --- a/packages/uipath/src/uipath/_cli/cli_eval.py +++ b/packages/uipath/src/uipath/_cli/cli_eval.py @@ -428,14 +428,13 @@ async def execute_eval(): eval_context.runtime_schema = await runtime.get_schema() - eval_context.agent_model = _get_agent_model( - eval_context.runtime_schema - ) + agent_model = _get_agent_model(eval_context.runtime_schema) + eval_context.agent_model = agent_model eval_context.evaluators = await EvalHelpers.load_evaluators( resolved_eval_set_path, eval_context.evaluation_set, - eval_context.agent_model, + agent_model, ) # Runtime is not required anymore. From ae6f134dba00acfd395e0281cc5a85a73ac93908 Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 10:55:48 -0700 Subject: [PATCH 09/12] fix: honor UI-specified model in simulation.json over agent model load_simulation_config now checks simulation.json for a model field first. If the UI specified a model, it's honored. Otherwise falls back to the agent's configured model. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../uipath/src/uipath/eval/mocks/_mock_runtime.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py index 03c2c8aa7..30a56126a 100644 --- a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py +++ b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py @@ -64,12 +64,21 @@ def load_simulation_config(agent_model: str | None = None) -> MockingContext | N if not tools_to_simulate: return None - # Create LLM mocking strategy with the agent's model + # Honor model from simulation config if specified, otherwise use the agent model + simulation_model = simulation_data.get("model") + model = ( + ModelSettings(model=simulation_model) + if simulation_model + else ModelSettings(model=agent_model) + if agent_model + else None + ) + mocking_strategy = LLMMockingStrategy( type=MockingStrategyType.LLM, prompt=simulation_data.get("instructions", ""), tools_to_simulate=tools_to_simulate, - model=ModelSettings(model=agent_model) if agent_model else None, + model=model, ) # Create MockingContext for debugging From 9bad552a18a886231f740b35438f97c2deafc4ea Mon Sep 17 00:00:00 2001 From: Akshaya Shanbhogue Date: Thu, 9 Apr 2026 11:34:56 -0700 Subject: [PATCH 10/12] fix: resolve circular import between cli_eval and eval.runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move _get_agent_model import from top-level in runtime.py to local imports at the two call sites, breaking the cycle: eval.runtime.__init__ → _evaluate → runtime → cli_eval → eval.runtime Also removes agent_model from UiPathEvalContext since the model is now extracted lazily from runtime_schema where needed. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/src/uipath/_cli/cli_eval.py | 5 +--- .../uipath/src/uipath/eval/runtime/context.py | 1 - .../uipath/src/uipath/eval/runtime/runtime.py | 26 ++++++++++++------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/packages/uipath/src/uipath/_cli/cli_eval.py b/packages/uipath/src/uipath/_cli/cli_eval.py index b1e6ad495..ef1edb200 100644 --- a/packages/uipath/src/uipath/_cli/cli_eval.py +++ b/packages/uipath/src/uipath/_cli/cli_eval.py @@ -428,13 +428,10 @@ async def execute_eval(): eval_context.runtime_schema = await runtime.get_schema() - agent_model = _get_agent_model(eval_context.runtime_schema) - eval_context.agent_model = agent_model - eval_context.evaluators = await EvalHelpers.load_evaluators( resolved_eval_set_path, eval_context.evaluation_set, - agent_model, + _get_agent_model(eval_context.runtime_schema), ) # Runtime is not required anymore. diff --git a/packages/uipath/src/uipath/eval/runtime/context.py b/packages/uipath/src/uipath/eval/runtime/context.py index 81263c54d..b8224718c 100644 --- a/packages/uipath/src/uipath/eval/runtime/context.py +++ b/packages/uipath/src/uipath/eval/runtime/context.py @@ -27,4 +27,3 @@ class UiPathEvalContext: input_overrides: dict[str, Any] | None = None resume: bool = False job_id: str | None = None - agent_model: str | None = None diff --git a/packages/uipath/src/uipath/eval/runtime/runtime.py b/packages/uipath/src/uipath/eval/runtime/runtime.py index 73385c141..ceb44d14b 100644 --- a/packages/uipath/src/uipath/eval/runtime/runtime.py +++ b/packages/uipath/src/uipath/eval/runtime/runtime.py @@ -532,13 +532,14 @@ async def _execute_eval( mocking_strategy and isinstance(mocking_strategy, LLMMockingStrategy) and not mocking_strategy.model - and self.context.agent_model ): - mocking_strategy = mocking_strategy.model_copy( - update={ - "model": ModelSettings(model=self.context.agent_model) - } - ) + from ..._cli.cli_eval import _get_agent_model + + mocking_model = _get_agent_model(self.context.runtime_schema) + if mocking_model: + mocking_strategy = mocking_strategy.model_copy( + update={"model": ModelSettings(model=mocking_model)} + ) agent_execution_output = await self.execute_runtime( eval_item, @@ -827,10 +828,15 @@ async def _generate_input_for_eval( ) # Set agent model on the input mocking strategy if not already set input_strategy = eval_item.input_mocking_strategy - if input_strategy and not input_strategy.model and self.context.agent_model: - input_strategy = input_strategy.model_copy( - update={"model": ModelSettings(model=self.context.agent_model)} - ) + # If input strategy does not specify a model, extract it + if input_strategy and not input_strategy.model: + from ..._cli.cli_eval import _get_agent_model + + input_generation_model = _get_agent_model(self.context.runtime_schema) + if input_generation_model: + input_strategy = input_strategy.model_copy( + update={"model": ModelSettings(model=input_generation_model)} + ) generated_input = await generate_llm_input( input_strategy, From 8d0532dfc7a50ec05ccd40fc1458571271ee82e6 Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 12:05:50 -0700 Subject: [PATCH 11/12] refactor: move get_agent_model to eval/helpers to avoid circular import Move _get_agent_model from cli_eval.py to eval/helpers.py as get_agent_model. This eliminates the inline imports in runtime.py that were needed to avoid the circular dependency between eval/runtime/runtime.py and _cli/cli_eval.py. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/uipath/src/uipath/_cli/cli_eval.py | 23 ------------------- packages/uipath/src/uipath/eval/helpers.py | 23 +++++++++++++++++++ .../src/uipath/eval/mocks/_mock_runtime.py | 5 +--- .../uipath/src/uipath/eval/runtime/runtime.py | 9 +++----- 4 files changed, 27 insertions(+), 33 deletions(-) diff --git a/packages/uipath/src/uipath/_cli/cli_eval.py b/packages/uipath/src/uipath/_cli/cli_eval.py index ef1edb200..9ebe275dc 100644 --- a/packages/uipath/src/uipath/_cli/cli_eval.py +++ b/packages/uipath/src/uipath/_cli/cli_eval.py @@ -25,7 +25,6 @@ from uipath.runtime import ( UiPathRuntimeContext, UiPathRuntimeFactoryRegistry, - UiPathRuntimeSchema, ) from uipath.telemetry._track import flush_events from uipath.tracing import ( @@ -65,27 +64,6 @@ def setup_reporting_prereq(no_report: bool) -> bool: return True -def _get_agent_model(schema: UiPathRuntimeSchema) -> str | None: - """Get agent model from the runtime schema metadata. - - The model is read from schema.metadata["settings"]["model"] which is - populated by the low-code agents runtime from agent.json. - - Returns: - The model name from agent settings, or None if not found. - """ - try: - if schema.metadata and "settings" in schema.metadata: - settings = schema.metadata["settings"] - model = settings.get("model") - if model: - logger.debug(f"Got agent model from schema.metadata: {model}") - return model - return None - except Exception: - return None - - def _resolve_model_settings_override( model_settings_id: str, evaluation_set: EvaluationSet ) -> dict[str, Any] | None: @@ -431,7 +409,6 @@ async def execute_eval(): eval_context.evaluators = await EvalHelpers.load_evaluators( resolved_eval_set_path, eval_context.evaluation_set, - _get_agent_model(eval_context.runtime_schema), ) # Runtime is not required anymore. diff --git a/packages/uipath/src/uipath/eval/helpers.py b/packages/uipath/src/uipath/eval/helpers.py index 0a0a0ca7f..8405e4a7a 100644 --- a/packages/uipath/src/uipath/eval/helpers.py +++ b/packages/uipath/src/uipath/eval/helpers.py @@ -7,6 +7,8 @@ from pydantic import ValidationError +from uipath.runtime.schema import UiPathRuntimeSchema + from .evaluators.base_evaluator import GenericBaseEvaluator from .evaluators.evaluator_factory import EvaluatorFactory from .mocks._types import InputMockingStrategy, LLMMockingStrategy @@ -277,3 +279,24 @@ async def load_evaluators( ) return evaluators + + +def get_agent_model(schema: UiPathRuntimeSchema) -> str | None: + """Get agent model from the runtime schema metadata. + + The model is read from schema.metadata["settings"]["model"] which is + populated by the low-code agents runtime from agent.json. + + Returns: + The model name from agent settings, or None if not found. + """ + try: + if schema.metadata and "settings" in schema.metadata: + settings = schema.metadata["settings"] + model = settings.get("model") + if model: + logger.debug(f"Got agent model from schema.metadata: {model}") + return model + return None + except Exception: + return None diff --git a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py index 30a56126a..df41dadeb 100644 --- a/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py +++ b/packages/uipath/src/uipath/eval/mocks/_mock_runtime.py @@ -144,12 +144,9 @@ def __init__( span_collector: ExecutionSpanCollector | None = None, execution_id: str | None = None, eval_set_run_id: str | None = None, - agent_model: str | None = None, ): self.delegate = delegate - self._mocking_context = mocking_context or load_simulation_config( - agent_model=agent_model - ) + self._mocking_context = mocking_context or load_simulation_config() self._span_collector = span_collector or ExecutionSpanCollector() self._execution_id = execution_id or str(uuid.uuid4()) self._eval_set_run_id = eval_set_run_id diff --git a/packages/uipath/src/uipath/eval/runtime/runtime.py b/packages/uipath/src/uipath/eval/runtime/runtime.py index ceb44d14b..7f7614446 100644 --- a/packages/uipath/src/uipath/eval/runtime/runtime.py +++ b/packages/uipath/src/uipath/eval/runtime/runtime.py @@ -47,6 +47,7 @@ from .._execution_context import ExecutionSpanCollector from ..evaluators.base_evaluator import GenericBaseEvaluator from ..evaluators.output_evaluator import OutputEvaluationCriteria +from ..helpers import get_agent_model from ..mocks._cache_manager import CacheManager from ..mocks._input_mocker import ( generate_llm_input, @@ -533,9 +534,7 @@ async def _execute_eval( and isinstance(mocking_strategy, LLMMockingStrategy) and not mocking_strategy.model ): - from ..._cli.cli_eval import _get_agent_model - - mocking_model = _get_agent_model(self.context.runtime_schema) + mocking_model = get_agent_model(self.context.runtime_schema) if mocking_model: mocking_strategy = mocking_strategy.model_copy( update={"model": ModelSettings(model=mocking_model)} @@ -830,9 +829,7 @@ async def _generate_input_for_eval( input_strategy = eval_item.input_mocking_strategy # If input strategy does not specify a model, extract it if input_strategy and not input_strategy.model: - from ..._cli.cli_eval import _get_agent_model - - input_generation_model = _get_agent_model(self.context.runtime_schema) + input_generation_model = get_agent_model(self.context.runtime_schema) if input_generation_model: input_strategy = input_strategy.model_copy( update={"model": ModelSettings(model=input_generation_model)} From 3bdf2fc4dd2aee375cacbe62470d95574810c217 Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 9 Apr 2026 13:24:28 -0700 Subject: [PATCH 12/12] fix: update test to import get_agent_model from helpers Co-Authored-By: Claude Opus 4.6 (1M context) --- .../cli/eval/test_eval_runtime_metadata.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/packages/uipath/tests/cli/eval/test_eval_runtime_metadata.py b/packages/uipath/tests/cli/eval/test_eval_runtime_metadata.py index 112f8774b..07042cc12 100644 --- a/packages/uipath/tests/cli/eval/test_eval_runtime_metadata.py +++ b/packages/uipath/tests/cli/eval/test_eval_runtime_metadata.py @@ -1,7 +1,7 @@ """Tests for UiPathEvalRuntime metadata loading functionality. This module tests: -- _get_agent_model() - cached agent model retrieval +- get_agent_model() - cached agent model retrieval - get_schema() - cached schema retrieval """ @@ -10,11 +10,9 @@ import pytest -from uipath._cli.cli_eval import ( - _get_agent_model, -) from uipath.core.events import EventBus from uipath.core.tracing import UiPathTraceManager +from uipath.eval.helpers import get_agent_model from uipath.eval.runtime import UiPathEvalContext, UiPathEvalRuntime from uipath.runtime import ( UiPathExecuteOptions, @@ -119,34 +117,34 @@ async def dispose(self) -> None: class TestGetAgentModel: - """Tests for _get_agent_model function.""" + """Tests for get_agent_model function.""" @pytest.mark.asyncio async def test_returns_agent_model(self): - """Test that _get_agent_model returns the correct model from schema.""" + """Test that get_agent_model returns the correct model from schema.""" schema = MockRuntimeSchema() schema.metadata = {"settings": {"model": "gpt-4o-2024-11-20"}} - model = _get_agent_model(schema) + model = get_agent_model(schema) assert model == "gpt-4o-2024-11-20" @pytest.mark.asyncio async def test_returns_none_when_no_model(self): - """Test that _get_agent_model returns None when runtime has no model.""" + """Test that get_agent_model returns None when runtime has no model.""" schema = MockRuntimeSchema() - model = _get_agent_model(schema) + model = get_agent_model(schema) assert model is None @pytest.mark.asyncio async def test_returns_model_consistently(self): - """Test that _get_agent_model returns consistent results.""" + """Test that get_agent_model returns consistent results.""" schema = MockRuntimeSchema() schema.metadata = {"settings": {"model": "consistent-model"}} # Multiple calls should return the same value - model1 = _get_agent_model(schema) - model2 = _get_agent_model(schema) + model1 = get_agent_model(schema) + model2 = get_agent_model(schema) assert model1 == model2 == "consistent-model"