Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 7 additions & 15 deletions frontend/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -7645,21 +7645,13 @@ <h1>Sisyphus</h1>
}

function buildRoutinePrompt(routine) {
// The agent receives the Routine markdown verbatim plus a one-line
// instruction. Keeping the Routine fully visible in the prompt is
// intentional: the OpenBrowser agent's tools (highlight/click/etc)
// are exactly the vocabulary the Routine was compiled in.
const goal = (routine.goal || routine.name || '').trim();
const goalLine = goal
? `Goal: ${goal}`
: `Goal: run the saved routine "${routine.name}".`;
return [
`Run the saved routine "${routine.name}".`,
goalLine,
'Follow this Routine step by step:',
'',
routine.routine_markdown || '',
].join('\n');
// In routine_replay mode the user message carries only the SOP
// markdown verbatim. The routine name, goal, and "follow step
// by step" framing live in the routine-replay system prompt
// block, not here — repeating them in the user message gave the
// model a redundant identifier to lose track of when long
// contexts decayed (session d1395b5d).
return routine.routine_markdown || '';
}

async function refreshRoutines() {
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ override-dependencies = [
]

[tool.uv.sources]
openhands-sdk = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-sdk", rev = "bd4cb296355c3d03dd411883e78527b1915fa8c4" }
openhands-tools = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-tools", rev = "bd4cb296355c3d03dd411883e78527b1915fa8c4" }
openhands-sdk = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-sdk", rev = "c92a185a" }
openhands-tools = { git = "https://github.com/softpudding/agent-sdk.git", subdirectory = "openhands-tools", rev = "c92a185a" }
31 changes: 30 additions & 1 deletion server/agent/browser_condenser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,38 @@
DEFAULT_BROWSER_CONDENSER_MAX_SIZE = 1000
DEFAULT_BROWSER_CONDENSER_TOKEN_RATIO = 0.7

# Per-model token caps for models with known long-context attention decay.
# Matched by case-insensitive substring against llm.model so provider
# prefixes (e.g. "dashscope/qwen3.5-flash") and variant suffixes still
# trigger the cap. Session d1395b5d saw qwen3.5-flash lose the original
# user message after ~100 browser events because the condenser's
# context-window-ratio threshold (~700k for a 1M-token model) never fired.
SMALL_MODEL_TOKEN_OVERRIDES: dict[str, int] = {
"qwen3.5-flash": 100_000,
}


def _small_model_token_override(model: str | None) -> int | None:
if not model:
return None
needle = model.lower()
for fragment, token_cap in SMALL_MODEL_TOKEN_OVERRIDES.items():
if fragment.lower() in needle:
return token_cap
return None


def derive_browser_condenser_max_tokens(llm: LLM) -> int | None:
"""Derive a token threshold for browser-heavy conversations."""
"""Derive a token threshold for browser-heavy conversations.

For models listed in ``SMALL_MODEL_TOKEN_OVERRIDES`` the cap is
returned directly, regardless of the model's advertised context
window. Otherwise the threshold is a fraction of the context window.
"""

override = _small_model_token_override(llm.model)
if override is not None:
return override

max_input_tokens = llm.max_input_tokens
if not max_input_tokens or max_input_tokens <= 0:
Expand Down
58 changes: 58 additions & 0 deletions server/tests/unit/test_browser_condenser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,64 @@ def test_configure_browser_condenser_prefers_token_limit() -> None:
assert condenser.max_tokens is None


def test_derive_browser_condenser_max_tokens_uses_small_model_override() -> None:
"""Small models with known long-context attention decay get a stricter
token budget than the 0.7×context_window derivation, even when their
advertised context window is much larger.

Rationale: session d1395b5d ran qwen3.5-flash past ~100 events with no
condensation and watched the model lose track of the original user
message. The override forces the condenser to kick in earlier for
these models regardless of advertised context size.
"""
llm = LLM.model_construct(
model="dashscope/qwen3.5-flash", max_input_tokens=1_000_000
)

assert derive_browser_condenser_max_tokens(llm) == 100_000


def test_derive_browser_condenser_max_tokens_override_matches_model_substring() -> None:
"""The override matches by substring so provider prefixes (litellm
style like ``dashscope/qwen3.5-flash`` or ``openai/qwen3.5-flash``)
still trigger the small-model cap.
"""
for model_name in (
"qwen3.5-flash",
"dashscope/qwen3.5-flash",
"openai/qwen3.5-flash-preview",
):
llm = LLM.model_construct(model=model_name, max_input_tokens=1_000_000)
assert derive_browser_condenser_max_tokens(llm) == 100_000, model_name


def test_derive_browser_condenser_max_tokens_override_ignores_unrelated_models() -> (
None
):
"""Models not in the override map keep the 0.7×context_window
derivation."""
llm = LLM.model_construct(
model="dashscope/qwen3.5-plus", max_input_tokens=1_000_000
)

assert derive_browser_condenser_max_tokens(llm) == 700_000


def test_configure_browser_condenser_applies_small_model_override() -> None:
"""When the LLM matches a small-model override, ``configure`` must use
the override value rather than the context-window derivation, even if
the derivation would give a higher threshold."""
llm = LLM.model_construct(
model="dashscope/qwen3.5-flash", max_input_tokens=1_000_000
)
condenser = LLMSummarizingCondenser(llm=llm, max_size=80, keep_first=4)

configured = configure_browser_condenser(condenser, llm)

assert isinstance(configured, LLMSummarizingCondenser)
assert configured.max_tokens == 100_000


def test_configure_browser_condenser_preserves_explicit_token_limit() -> None:
llm = LLM.model_construct(model="test-model", max_input_tokens=100_000)
condenser = LLMSummarizingCondenser(
Expand Down
8 changes: 4 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading