diff --git a/frontend/index.html b/frontend/index.html index dc272b0..35a0a0a 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -7645,21 +7645,13 @@

Sisyphus

} function buildRoutinePrompt(routine) { - // The agent receives the Routine markdown verbatim plus a one-line - // instruction. Keeping the Routine fully visible in the prompt is - // intentional: the OpenBrowser agent's tools (highlight/click/etc) - // are exactly the vocabulary the Routine was compiled in. - const goal = (routine.goal || routine.name || '').trim(); - const goalLine = goal - ? `Goal: ${goal}` - : `Goal: run the saved routine "${routine.name}".`; - return [ - `Run the saved routine "${routine.name}".`, - goalLine, - 'Follow this Routine step by step:', - '', - routine.routine_markdown || '', - ].join('\n'); + // In routine_replay mode the user message carries only the SOP + // markdown verbatim. The routine name, goal, and "follow step + // by step" framing live in the routine-replay system prompt + // block, not here — repeating them in the user message gave the + // model a redundant identifier to lose track of when long + // contexts decayed (session d1395b5d). + return routine.routine_markdown || ''; } async function refreshRoutines() { diff --git a/pyproject.toml b/pyproject.toml index 69ae578..d705a4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,5 +76,5 @@ override-dependencies = [ ] [tool.uv.sources] -openhands-sdk = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-sdk", rev = "bd4cb296355c3d03dd411883e78527b1915fa8c4" } -openhands-tools = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-tools", rev = "bd4cb296355c3d03dd411883e78527b1915fa8c4" } +openhands-sdk = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-sdk", rev = "c92a185a" } +openhands-tools = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-tools", rev = "c92a185a" } diff --git a/server/agent/browser_condenser.py b/server/agent/browser_condenser.py index 298d1d2..40a62e0 100644 --- a/server/agent/browser_condenser.py +++ b/server/agent/browser_condenser.py @@ -11,9 +11,38 @@ DEFAULT_BROWSER_CONDENSER_MAX_SIZE = 1000 DEFAULT_BROWSER_CONDENSER_TOKEN_RATIO = 0.7 +# Per-model token caps for models with known long-context attention decay. +# Matched by case-insensitive substring against llm.model so provider +# prefixes (e.g. "dashscope/qwen3.5-flash") and variant suffixes still +# trigger the cap. Session d1395b5d saw qwen3.5-flash lose the original +# user message after ~100 browser events because the condenser's +# context-window-ratio threshold (~700k for a 1M-token model) never fired. +SMALL_MODEL_TOKEN_OVERRIDES: dict[str, int] = { + "qwen3.5-flash": 100_000, +} + + +def _small_model_token_override(model: str | None) -> int | None: + if not model: + return None + needle = model.lower() + for fragment, token_cap in SMALL_MODEL_TOKEN_OVERRIDES.items(): + if fragment.lower() in needle: + return token_cap + return None + def derive_browser_condenser_max_tokens(llm: LLM) -> int | None: - """Derive a token threshold for browser-heavy conversations.""" + """Derive a token threshold for browser-heavy conversations. + + For models listed in ``SMALL_MODEL_TOKEN_OVERRIDES`` the cap is + returned directly, regardless of the model's advertised context + window. Otherwise the threshold is a fraction of the context window. + """ + + override = _small_model_token_override(llm.model) + if override is not None: + return override max_input_tokens = llm.max_input_tokens if not max_input_tokens or max_input_tokens <= 0: diff --git a/server/tests/unit/test_browser_condenser.py b/server/tests/unit/test_browser_condenser.py index ca44d31..7a4094c 100644 --- a/server/tests/unit/test_browser_condenser.py +++ b/server/tests/unit/test_browser_condenser.py @@ -37,6 +37,64 @@ def test_configure_browser_condenser_prefers_token_limit() -> None: assert condenser.max_tokens is None +def test_derive_browser_condenser_max_tokens_uses_small_model_override() -> None: + """Small models with known long-context attention decay get a stricter + token budget than the 0.7×context_window derivation, even when their + advertised context window is much larger. + + Rationale: session d1395b5d ran qwen3.5-flash past ~100 events with no + condensation and watched the model lose track of the original user + message. The override forces the condenser to kick in earlier for + these models regardless of advertised context size. + """ + llm = LLM.model_construct( + model="dashscope/qwen3.5-flash", max_input_tokens=1_000_000 + ) + + assert derive_browser_condenser_max_tokens(llm) == 100_000 + + +def test_derive_browser_condenser_max_tokens_override_matches_model_substring() -> None: + """The override matches by substring so provider prefixes (litellm + style like ``dashscope/qwen3.5-flash`` or ``openai/qwen3.5-flash``) + still trigger the small-model cap. + """ + for model_name in ( + "qwen3.5-flash", + "dashscope/qwen3.5-flash", + "openai/qwen3.5-flash-preview", + ): + llm = LLM.model_construct(model=model_name, max_input_tokens=1_000_000) + assert derive_browser_condenser_max_tokens(llm) == 100_000, model_name + + +def test_derive_browser_condenser_max_tokens_override_ignores_unrelated_models() -> ( + None +): + """Models not in the override map keep the 0.7×context_window + derivation.""" + llm = LLM.model_construct( + model="dashscope/qwen3.5-plus", max_input_tokens=1_000_000 + ) + + assert derive_browser_condenser_max_tokens(llm) == 700_000 + + +def test_configure_browser_condenser_applies_small_model_override() -> None: + """When the LLM matches a small-model override, ``configure`` must use + the override value rather than the context-window derivation, even if + the derivation would give a higher threshold.""" + llm = LLM.model_construct( + model="dashscope/qwen3.5-flash", max_input_tokens=1_000_000 + ) + condenser = LLMSummarizingCondenser(llm=llm, max_size=80, keep_first=4) + + configured = configure_browser_condenser(condenser, llm) + + assert isinstance(configured, LLMSummarizingCondenser) + assert configured.max_tokens == 100_000 + + def test_configure_browser_condenser_preserves_explicit_token_limit() -> None: llm = LLM.model_construct(model="test-model", max_input_tokens=100_000) condenser = LLMSummarizingCondenser( diff --git a/uv.lock b/uv.lock index 36f3fc5..2104838 100644 --- a/uv.lock +++ b/uv.lock @@ -1678,8 +1678,8 @@ requires-dist = [ { name = "litellm", git = "https://github.com/softpudding/litellm.git?rev=2eb7db59461e9117b1e3e0519616b39f1497c0f9" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.7.0" }, { name = "numpy", specifier = ">=1.24.0" }, - { name = "openhands-sdk", git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-sdk&rev=bd4cb296355c3d03dd411883e78527b1915fa8c4" }, - { name = "openhands-tools", git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-tools&rev=bd4cb296355c3d03dd411883e78527b1915fa8c4" }, + { name = "openhands-sdk", git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-sdk&rev=c92a185a" }, + { name = "openhands-tools", git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-tools&rev=c92a185a" }, { name = "pillow", specifier = ">=10.0.0" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.0.0" }, { name = "pydantic", specifier = ">=2.5.0" }, @@ -2224,7 +2224,7 @@ wheels = [ [[package]] name = "openhands-sdk" version = "1.12.0" -source = { git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-sdk&rev=bd4cb296355c3d03dd411883e78527b1915fa8c4#bd4cb296355c3d03dd411883e78527b1915fa8c4" } +source = { git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-sdk&rev=c92a185a#c92a185a00aa7ae58547d794835575742f1ed27e" } dependencies = [ { name = "agent-client-protocol" }, { name = "deprecation" }, @@ -2244,7 +2244,7 @@ dependencies = [ [[package]] name = "openhands-tools" version = "1.12.0" -source = { git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-tools&rev=bd4cb296355c3d03dd411883e78527b1915fa8c4#bd4cb296355c3d03dd411883e78527b1915fa8c4" } +source = { git = "https://github.com/softpudding/agent-sdk.git?subdirectory=openhands-tools&rev=c92a185a#c92a185a00aa7ae58547d794835575742f1ed27e" } dependencies = [ { name = "bashlex" }, { name = "binaryornot" },