From d9ff186cada59fdb8166ca25f94abe5f01e820e0 Mon Sep 17 00:00:00 2001 From: alcholiclg Date: Tue, 30 Jun 2026 15:12:28 +0800 Subject: [PATCH] support template agents --- .gitignore | 2 + MANIFEST.in | 3 + ms_agent/agent/loader.py | 3 + ms_agent/agent/templates/__init__.py | 24 +++ ms_agent/agent/templates/build/agent.yaml | 43 +++++ ms_agent/agent/templates/compose_prompt.py | 48 ++++++ ms_agent/agent/templates/explore/agent.yaml | 39 +++++ ms_agent/agent/templates/general/agent.yaml | 52 ++++++ ms_agent/agent/templates/harness/__init__.py | 52 ++++++ .../agent/templates/harness/loop_guard.py | 156 ++++++++++++++++++ .../agent/templates/harness/plan_check.py | 124 ++++++++++++++ .../agent/templates/harness/quality_check.py | 88 ++++++++++ .../agent/templates/harness/round_reminder.py | 74 +++++++++ .../agent/templates/harness/state_inject.py | 63 +++++++ ms_agent/agent/templates/harness/stop_gate.py | 149 +++++++++++++++++ .../agent/templates/harness/subagent_limit.py | 82 +++++++++ ms_agent/agent/templates/harness/todo_gate.py | 88 ++++++++++ ms_agent/agent/templates/plan/agent.yaml | 51 ++++++ .../agent/templates/prompts/base/general.md | 19 +++ .../agent/templates/prompts/base/worker.md | 9 + ms_agent/agent/templates/registry.py | 133 +++++++++++++++ ms_agent/agent/templates/registry.yaml | 47 ++++++ ms_agent/agent/templates/research/agent.yaml | 65 ++++++++ ms_agent/agent/templates/subagent_expand.py | 103 ++++++++++++ ms_agent/cli/run.py | 12 +- ms_agent/config/config.py | 24 +++ ms_agent/tools/agent_tool.py | 53 +++++- requirements/framework.txt | 1 + setup.py | 1 + tests/agent/test_harness_callbacks.py | 121 ++++++++++++++ tests/agent/test_loop_guard.py | 100 +++++++++++ tests/agent/test_plan_check.py | 106 ++++++++++++ tests/agent/test_prompt_layering.py | 91 ++++++++++ tests/agent/test_subagent_depth.py | 38 +++++ tests/agent/test_subagent_expand.py | 62 +++++++ tests/agent/test_subagent_limit.py | 60 +++++++ tests/agent/test_template_registry.py | 42 +++++ tests/agent/test_templates_smoke.py | 45 +++++ tests/agent/test_todo_gate.py | 63 +++++++ 39 files changed, 2328 insertions(+), 8 deletions(-) create mode 100644 ms_agent/agent/templates/__init__.py create mode 100644 ms_agent/agent/templates/build/agent.yaml create mode 100644 ms_agent/agent/templates/compose_prompt.py create mode 100644 ms_agent/agent/templates/explore/agent.yaml create mode 100644 ms_agent/agent/templates/general/agent.yaml create mode 100644 ms_agent/agent/templates/harness/__init__.py create mode 100644 ms_agent/agent/templates/harness/loop_guard.py create mode 100644 ms_agent/agent/templates/harness/plan_check.py create mode 100644 ms_agent/agent/templates/harness/quality_check.py create mode 100644 ms_agent/agent/templates/harness/round_reminder.py create mode 100644 ms_agent/agent/templates/harness/state_inject.py create mode 100644 ms_agent/agent/templates/harness/stop_gate.py create mode 100644 ms_agent/agent/templates/harness/subagent_limit.py create mode 100644 ms_agent/agent/templates/harness/todo_gate.py create mode 100644 ms_agent/agent/templates/plan/agent.yaml create mode 100644 ms_agent/agent/templates/prompts/base/general.md create mode 100644 ms_agent/agent/templates/prompts/base/worker.md create mode 100644 ms_agent/agent/templates/registry.py create mode 100644 ms_agent/agent/templates/registry.yaml create mode 100644 ms_agent/agent/templates/research/agent.yaml create mode 100644 ms_agent/agent/templates/subagent_expand.py create mode 100644 tests/agent/test_harness_callbacks.py create mode 100644 tests/agent/test_loop_guard.py create mode 100644 tests/agent/test_plan_check.py create mode 100644 tests/agent/test_prompt_layering.py create mode 100644 tests/agent/test_subagent_depth.py create mode 100644 tests/agent/test_subagent_expand.py create mode 100644 tests/agent/test_subagent_limit.py create mode 100644 tests/agent/test_template_registry.py create mode 100644 tests/agent/test_templates_smoke.py create mode 100644 tests/agent/test_todo_gate.py diff --git a/.gitignore b/.gitignore index 58fd44f05..7c518dd24 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ test/ # Distribution / packaging .Python build/ +!ms_agent/agent/templates/build/ +!ms_agent/agent/templates/build/** develop-eggs/ dist/ downloads/ diff --git a/MANIFEST.in b/MANIFEST.in index b7ac745da..6a142391c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,6 +4,9 @@ include requirements.txt recursive-include requirements *.txt recursive-include ms_agent/ *.yaml +# Include built-in agent templates (yaml + prompt files) +recursive-include ms_agent/agent/templates * + # Include projects recursive-include projects * diff --git a/ms_agent/agent/loader.py b/ms_agent/agent/loader.py index 48ed74d91..5c9fed7f7 100644 --- a/ms_agent/agent/loader.py +++ b/ms_agent/agent/loader.py @@ -23,6 +23,9 @@ def build(cls, **kwargs) -> Agent: agent_config: Optional[DictConfig] = None if config_dir_or_id is not None: + from ms_agent.agent.templates.registry import \ + resolve_template_source + config_dir_or_id = resolve_template_source(config_dir_or_id) if not os.path.exists(config_dir_or_id): from modelscope import snapshot_download config_dir_or_id = snapshot_download(config_dir_or_id) diff --git a/ms_agent/agent/templates/__init__.py b/ms_agent/agent/templates/__init__.py new file mode 100644 index 000000000..13a7fc25b --- /dev/null +++ b/ms_agent/agent/templates/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Built-in template agents (general / plan / explore / build / research). + +Importing this package also registers the built-in harness callbacks +(``stop_gate``, ``round_reminder``, ``model_quality_check``) into +``ms_agent.callbacks.callbacks_mapping`` so templates can reference them in +``callbacks:`` without ``trust_remote_code``. The import is best-effort: a +failure here must never break config loading. +""" +from .registry import (get_when_to_use, list_templates, load_manifest, + resolve_template_dir, resolve_template_source) + +try: # best-effort harness registration; see templates/harness/__init__.py + from . import harness # noqa: F401 +except Exception: # pragma: no cover - harness is optional + pass + +__all__ = [ + 'resolve_template_source', + 'resolve_template_dir', + 'load_manifest', + 'list_templates', + 'get_when_to_use', +] diff --git a/ms_agent/agent/templates/build/agent.yaml b/ms_agent/agent/templates/build/agent.yaml new file mode 100644 index 000000000..7a9230659 --- /dev/null +++ b/ms_agent/agent/templates/build/agent.yaml @@ -0,0 +1,43 @@ +# Built-in template: build (model_tier: strong) +# Coding sub-agent: implement, edit, debug, verify. +llm: + service: openai + model: qwen3.7-plus # strong tier; bump to qwen3-max if desired + openai_api_key: + openai_base_url: + +generation_config: + stream: true + +prompt: + base: worker # shared sub-agent base prompt (prompts/base/worker.md) + system: | # specialization only + You are a coding agent. For each task: + 1) Analyze the requirements first, then implement. + 2) Prefer minimal, correct edits that match the surrounding code style. + 3) Run code / tests to verify your changes; read errors, diagnose, and fix + them -- don't give up after one attempt. + 4) Stay in scope: don't refactor unrelated code. Never run destructive + commands without explicit instruction. + 5) Report what you changed and how you verified it. + +# No interactive input_callback: build is a focused, run-once / sub-agent +# template (input_callback would block on stdin when invoked as a sub-agent). +callbacks: + - state_inject + - loop_guard # break invalid tool-call loops + - todo_gate # don't stop with unfinished plan items + +tools: + file_system: + mcp: false + include: [read_file, write_file, edit_file, grep, glob] + code_executor: + mcp: false + implementation: python_env + todo_list: + mcp: false + auto_render_md: true + include: [todo_write, todo_read] + +max_chat_round: 45 diff --git a/ms_agent/agent/templates/compose_prompt.py b/ms_agent/agent/templates/compose_prompt.py new file mode 100644 index 000000000..342bb0c4d --- /dev/null +++ b/ms_agent/agent/templates/compose_prompt.py @@ -0,0 +1,48 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Compose a layered system prompt: a shared BASE + a per-template SPECIALIZATION. + +A template declares:: + + prompt: + base: general # general | worker | none (default: none) + system: | # specialization only + + +``compose_system_prompt`` reads ``prompts/base/.md`` and prepends it to +``prompt.system``. Non-template configs (no ``prompt.base``) are untouched. + +Environment placeholders in the base (```` / ```` / ````) +are filled at run time by ``StateInjectCallback`` (kept out of load-time so the +prompt text stays stable for caching across processes). +""" +from __future__ import annotations + +from pathlib import Path + +_BASE_DIR = Path(__file__).resolve().parent / 'prompts' / 'base' + + +def _base_path(name: str) -> Path: + return _BASE_DIR / f'{name}.md' + + +def compose_system_prompt(config): + """Prepend the selected base prompt to ``config.prompt.system``. + + Returns ``config`` unchanged when there is no ``prompt.base`` (or it is + ``none`` / missing on disk). Best-effort: never raises. + """ + prompt = getattr(config, 'prompt', None) + if prompt is None: + return config + base_name = getattr(prompt, 'base', None) + if not base_name or str(base_name).lower() == 'none': + return config + path = _base_path(str(base_name)) + if not path.is_file(): + return config + base_text = path.read_text(encoding='utf-8').rstrip() + spec = getattr(prompt, 'system', '') or '' + spec = spec.strip() + config.prompt.system = base_text + ('\n\n' + spec if spec else '') + return config diff --git a/ms_agent/agent/templates/explore/agent.yaml b/ms_agent/agent/templates/explore/agent.yaml new file mode 100644 index 000000000..854136d78 --- /dev/null +++ b/ms_agent/agent/templates/explore/agent.yaml @@ -0,0 +1,39 @@ +# Built-in template: explore (model_tier: fast) +# Fast, read-only exploration sub-agent. +llm: + service: openai + model: qwen3.7-plus # fast tier + openai_api_key: + openai_base_url: + +generation_config: + stream: true + +prompt: + base: worker # shared sub-agent base prompt (prompts/base/worker.md) + system: | # specialization only + You are a read-only exploration specialist: locate files, search code / + content, and answer questions about the codebase or materials. + - Use grep/glob for patterns, read_file for known paths, web search for the web. + - Return absolute file paths and concise, evidence-backed findings. + - Adapt depth to the requested thoroughness (quick / medium / very thorough). + You MUST NOT create or modify any files, or run anything that changes system + state. If asked to change something, report what should change instead. + +# No interactive input_callback: explore is a focused, run-once / sub-agent +# template (input_callback would block on stdin when invoked as a sub-agent). +callbacks: + - state_inject + - loop_guard # break invalid tool-call loops + +tools: + file_system: + mcp: false + include: [read_file, grep, glob] + web_search: + mcp: false + engine: exa + fetch_content: true + max_results: 5 + +max_chat_round: 30 diff --git a/ms_agent/agent/templates/general/agent.yaml b/ms_agent/agent/templates/general/agent.yaml new file mode 100644 index 000000000..f9f07c187 --- /dev/null +++ b/ms_agent/agent/templates/general/agent.yaml @@ -0,0 +1,52 @@ +# Built-in template: general (model_tier: strong) +# Default entry point: full analyze -> plan -> execute; can delegate to sub-agents. +llm: + service: openai + model: qwen3.7-plus # strong tier; bump to qwen3-max if desired + openai_api_key: + openai_base_url: + +generation_config: + stream: true + +prompt: + base: general # shared base prompt (prompts/base/general.md) + system: | # specialization only + Delegate a subtask to a sub-agent when it is genuinely better handled in + isolation (do not delegate trivial work): + - `explore` : read-only search / locate across many files or sources, + - `build` : a focused coding / debugging subtask, + - `research` : an in-depth, citation-backed research subtask. + For non-trivial work, sketch a short plan with the todo tools before executing. + +callbacks: + - input_callback + - state_inject + - loop_guard # break invalid tool-call loops + - subagent_limit # cap parallel sub-agent delegations per turn + +subagent_limit: + max_parallel: 4 + +tools: + file_system: + mcp: false + include: [read_file, write_file, edit_file, grep, glob] + code_executor: + mcp: false + implementation: python_env + web_search: + mcp: false + engine: exa + fetch_content: true + max_results: 5 + todo_list: + mcp: false + auto_render_md: true + include: [todo_write, todo_read] + +# Sub-agent delegation. Expanded into tools.agent_tools.definitions at load time +# by expand_subagents(); descriptions come from templates/registry.yaml. +subagents: [explore, build, research] + +max_chat_round: 9999 diff --git a/ms_agent/agent/templates/harness/__init__.py b/ms_agent/agent/templates/harness/__init__.py new file mode 100644 index 000000000..7772604f0 --- /dev/null +++ b/ms_agent/agent/templates/harness/__init__.py @@ -0,0 +1,52 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Reusable harness callbacks for template agents. + +These are generalized from deep_research v2's private callbacks so that any +template (or user config) can opt into them via ``callbacks:``. + +Importing this module registers the callbacks into +``ms_agent.callbacks.callbacks_mapping`` (using ``setdefault`` so it never +clobbers existing entries), which lets templates reference +``callbacks: [round_reminder, stop_gate]`` WITHOUT ``trust_remote_code``. +""" +from .loop_guard import LoopGuardCallback +from .plan_check import PlanCheckCallback +from .round_reminder import RoundReminderCallback +from .state_inject import StateInjectCallback +from .stop_gate import StopGateCallback +from .subagent_limit import SubagentLimitCallback +from .todo_gate import TodoGateCallback + +_HARNESS_CALLBACKS = { + 'round_reminder': RoundReminderCallback, + 'stop_gate': StopGateCallback, + 'state_inject': StateInjectCallback, + 'loop_guard': LoopGuardCallback, + 'todo_gate': TodoGateCallback, + 'plan_check': PlanCheckCallback, + 'subagent_limit': SubagentLimitCallback, +} + + +def register_harness_callbacks() -> None: + """Idempotently register harness callbacks into the global mapping.""" + try: + from ms_agent.callbacks import callbacks_mapping + for name, cls in _HARNESS_CALLBACKS.items(): + callbacks_mapping.setdefault(name, cls) + except Exception: # pragma: no cover - registration is best-effort + pass + + +register_harness_callbacks() + +__all__ = [ + 'RoundReminderCallback', + 'StopGateCallback', + 'StateInjectCallback', + 'LoopGuardCallback', + 'TodoGateCallback', + 'PlanCheckCallback', + 'SubagentLimitCallback', + 'register_harness_callbacks', +] diff --git a/ms_agent/agent/templates/harness/loop_guard.py b/ms_agent/agent/templates/harness/loop_guard.py new file mode 100644 index 000000000..5207cd59f --- /dev/null +++ b/ms_agent/agent/templates/harness/loop_guard.py @@ -0,0 +1,156 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""LoopGuardCallback -- detect and break invalid tool-call loops. + +Generalized from deer-flow's LoopDetectionMiddleware (same thresholds and +two-layer design): + - repeated-signature: the same (tool, stable-key) appears >= warn / hard times + within a sliding window; + - frequency: the same tool is called >= freq_warn / freq_hard times overall. + +Detection runs in ``on_tool_call`` (fresh assistant tool_calls) but any injected +*user* message is emitted in ``after_tool_call`` -- injecting a user message +between an assistant's tool_calls and their tool results would be malformed. +""" +from __future__ import annotations + +import hashlib +import json +from collections import Counter, deque +from typing import List + +from omegaconf import OmegaConf + +from ms_agent.agent.runtime import Runtime +from ms_agent.callbacks.base import Callback +from ms_agent.llm.utils import Message +from ms_agent.utils import get_logger + +logger = get_logger() + +_WARN_MSG = ( + '[LOOP_GUARD] You are repeating the same kind of tool call without making ' + 'progress. Stop and reassess: try a different approach or tool, or explain ' + 'what is blocking you.') +_HARD_MSG = ( + '[LOOP_GUARD] Detected a repeated / high-frequency tool-call loop with no ' + 'progress, so execution was stopped. Briefly tell the user what you were ' + 'trying to do and where you got stuck.') + + +class LoopGuardCallback(Callback): + """Config block (deer-flow defaults):: + + loop_guard: + enabled: true + window: 20 + warn: 3 # repeated-signature warn / hard + hard: 5 + freq_warn: 30 # per-tool frequency warn / hard + freq_hard: 50 + overrides: # per-tool (freq_warn, freq_hard) overrides + file_system---read_file: [120, 200] + """ + + def __init__(self, config): + super().__init__(config) + cfg = getattr(config, 'loop_guard', None) + self.enabled = bool(getattr(cfg, 'enabled', True)) + self.window = int(getattr(cfg, 'window', 20)) + self.warn = int(getattr(cfg, 'warn', 3)) + self.hard = int(getattr(cfg, 'hard', 5)) + self.freq_warn = int(getattr(cfg, 'freq_warn', 30)) + self.freq_hard = int(getattr(cfg, 'freq_hard', 50)) + ov = getattr(cfg, 'overrides', None) + self.overrides = {} + if ov is not None: + try: + for k, v in (OmegaConf.to_container(ov, resolve=True) + or {}).items(): + self.overrides[str(k)] = (int(v[0]), int(v[1])) + except Exception: + self.overrides = {} + self._recent: deque = deque(maxlen=self.window) + self._freq: Counter = Counter() + self._warned = set() + self._pending = None # 'warn' | 'hard' | None + + # ── helpers ──────────────────────────────────────────────────────────── + + @staticmethod + def _bare(name: str) -> str: + return name.split('---')[-1] if name else '' + + def _stable_key(self, name: str, args) -> str: + try: + a = json.loads(args) if isinstance(args, str) else (args or {}) + if not isinstance(a, dict): + a = {'_': str(a)} + except Exception: + a = {'_raw': str(args)} + bare = self._bare(name) + if bare in ('read_file', 'read'): + path = str( + a.get('path') or a.get('file') or a.get('file_path') or '') + try: + bucket = int(a.get('start') or a.get('start_line') or 0) // 200 + except Exception: + bucket = 0 + return f'{path}#{bucket}' + if bare in ('write_file', 'edit_file', 'str_replace', 'write', 'edit'): + return self._hash(a) + salient = { + k: a[k] + for k in ('path', 'file', 'file_path', 'url', 'query', 'command', + 'cmd', 'pattern', 'glob') if k in a + } + return self._hash(salient or a) + + @staticmethod + def _hash(obj) -> str: + return hashlib.md5( + json.dumps(obj, sort_keys=True, + default=str).encode('utf-8')).hexdigest()[:12] + + @staticmethod + def _tc_field(tc, key): + return tc.get(key, '') if isinstance(tc, dict) else getattr( + tc, key, '') + + # ── lifecycle ────────────────────────────────────────────────────────── + + async def on_tool_call(self, runtime: Runtime, messages: List[Message]): + if not self.enabled or not messages: + return + m = messages[-1] + if m.role != 'assistant' or not m.tool_calls: + return + for tc in m.tool_calls: + name = self._tc_field(tc, 'tool_name') + sig = (self._bare(name), + self._stable_key(name, self._tc_field(tc, 'arguments'))) + self._recent.append(sig) + self._freq[sig[0]] += 1 + count = sum(1 for s in self._recent if s == sig) + fw, fh = self.overrides.get(name, self.overrides.get( + sig[0], (self.freq_warn, self.freq_hard))) + if count >= self.hard or self._freq[sig[0]] >= fh: + self._pending = 'hard' + logger.warning('LoopGuard: hard limit on %s (count=%d freq=%d)', + sig, count, self._freq[sig[0]]) + return + if count >= self.warn or self._freq[sig[0]] >= fw: + key = sig if count >= self.warn else ('FREQ', sig[0]) + if key not in self._warned: + self._warned.add(key) + self._pending = 'warn' + logger.info('LoopGuard: warn on %s', sig) + + async def after_tool_call(self, runtime: Runtime, messages: List[Message]): + if self._pending is None: + return + pending, self._pending = self._pending, None + if pending == 'hard': + runtime.should_stop = True + messages.append(Message(role='user', content=_HARD_MSG)) + elif pending == 'warn': + messages.append(Message(role='user', content=_WARN_MSG)) diff --git a/ms_agent/agent/templates/harness/plan_check.py b/ms_agent/agent/templates/harness/plan_check.py new file mode 100644 index 000000000..8288fa95a --- /dev/null +++ b/ms_agent/agent/templates/harness/plan_check.py @@ -0,0 +1,124 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""PlanCheckCallback -- completeness self-check when a plan is first created. + +When the agent creates a plan (first ``todo_write`` call), this captures it and +runs a lightweight LLM completeness check against the user's original request, +then injects a single user message with concrete feedback (or nothing if the +plan looks complete). This is the *creation-time* half of plan completeness; the +*stop-time* half is ``TodoGateCallback``. +""" +from __future__ import annotations + +import json +import os +from typing import List, Optional + +from ms_agent.agent.runtime import Runtime +from ms_agent.callbacks.base import Callback +from ms_agent.llm.utils import Message +from ms_agent.utils import get_logger + +from .quality_check import LLMQualityChecker + +logger = get_logger() + +_PLAN_RUBRIC = ( + "You audit a freshly-created task plan against the user's request. PASS if " + 'the plan covers the request\'s key requirements and each step is concrete ' + 'and verifiable. FAIL only if there are clear gaps: a stated requirement is ' + 'missing, steps are vague with no way to verify them, or the scope is wrong. ' + 'Wording, ordering, and style are out of scope.\n' + 'Respond with EXACTLY one JSON object and nothing else: {"pass": true} or ' + '{"pass": false, "reason": "<= two sentences naming the concrete gap(s)>"}.') + + +class PlanCheckCallback(Callback): + """Config block:: + + plan_check: + enabled: true + model: qwen3.7-plus # judge model (defaults to the agent's llm) + plan_file: plan.json + """ + + def __init__(self, config): + super().__init__(config) + cfg = getattr(config, 'plan_check', None) + self.enabled = bool(getattr(cfg, 'enabled', True)) + self.plan_file = str(getattr(cfg, 'plan_file', 'plan.json')) + self.output_dir = getattr(config, 'output_dir', None) or '.' + self._llm = getattr(config, 'llm', None) + self._model = getattr(cfg, 'model', None) + self._checked = False + self._user_request: Optional[str] = None + + async def on_task_begin(self, runtime: Runtime, messages: List[Message]): + for m in messages: + if m.role == 'user' and isinstance(m.content, + str) and m.content.strip(): + self._user_request = m.content + break + + def _read_plan_text(self) -> str: + path = self.plan_file if os.path.isabs(self.plan_file) else os.path.join( + self.output_dir, self.plan_file) + if not os.path.isfile(path): + return '' + try: + with open(path, 'r', encoding='utf-8') as f: + data = json.load(f) + except Exception: + return '' + todos = data.get('todos') if isinstance(data, dict) else data + if not todos: + return '' + lines = [] + for t in todos: + t = t or {} + lines.append( + f"- [{t.get('status', '?')}] {t.get('content', '')}") + return '\n'.join(lines) + + async def after_tool_call(self, runtime: Runtime, messages: List[Message]): + if not self.enabled or self._checked: + return + # Find the most recent assistant message that issued tool calls. + assistant = None + for m in reversed(messages): + if m.role == 'assistant' and m.tool_calls: + assistant = m + break + if assistant is None: + return + wrote_plan = any( + str((tc.get('tool_name', '') if isinstance(tc, dict) else getattr( + tc, 'tool_name', ''))).endswith('todo_write') + for tc in assistant.tool_calls) + if not wrote_plan: + return + self._checked = True # only judge the first plan creation + + plan_text = self._read_plan_text() + if not plan_text.strip(): + return + content = (f'User request:\n{self._user_request or "(unknown)"}\n\n' + f'Plan:\n{plan_text}') + model = str( + self._model or getattr(self._llm, 'model', 'qwen3.5-plus')) + api_key = getattr(self._llm, 'openai_api_key', None) + base_url = getattr(self._llm, 'openai_base_url', None) + try: + reason = LLMQualityChecker(model, api_key, base_url, + _PLAN_RUBRIC).check(content) + except Exception as exc: # pragma: no cover + logger.warning('PlanCheck: judge failed: %s', exc) + return + if reason: + messages.append( + Message( + role='user', + content=( + f'[PLAN_CHECK] Your plan looks incomplete: {reason} ' + 'Please revise the plan (todo_write) to close these ' + 'gaps before proceeding.'))) + logger.info('PlanCheck: injected feedback: %s', reason) diff --git a/ms_agent/agent/templates/harness/quality_check.py b/ms_agent/agent/templates/harness/quality_check.py new file mode 100644 index 000000000..51729850b --- /dev/null +++ b/ms_agent/agent/templates/harness/quality_check.py @@ -0,0 +1,88 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""LLMQualityChecker -- a lightweight LLM-as-judge. + +Generalized from deep_research v2 quality_checker.ModelQualityChecker. Returns a +short failure-reason string, or ``None`` when the content passes. +""" +from __future__ import annotations + +import json +from typing import Optional + +from omegaconf import OmegaConf + +from ms_agent.llm.utils import Message +from ms_agent.utils import get_logger + +logger = get_logger() + +_DEFAULT_SYSTEM = ( + 'You are a strict quality auditor. Decide whether the supplied answer is ' + 'acceptable. Flag it ONLY if it clearly contains any of: placeholder or ' + 'abbreviation markers in place of real content (e.g. "...for brevity", ' + '"omitted for brevity", "remaining content follows the same pattern"); ' + 'fabricated-looking URLs or citations; or a pointer to an external file ' + 'instead of the actual content. Stylistic choices, structure and citation ' + 'density are OUT OF SCOPE -- do not fail for those.\n' + 'Respond with EXACTLY one JSON object and nothing else: ' + '{"pass": true} or {"pass": false, "reason": "<= two sentences"}.') + + +class LLMQualityChecker: + """Call a lightweight model to audit a piece of content. + + The client is built lazily and is independent of the agent's own LLM, so a + cheaper/faster judge model can be used. + """ + + _MAX_CHARS = 80000 + + def __init__(self, + model: str, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + system_prompt: Optional[str] = None): + self._model = model + self._api_key = api_key + self._base_url = base_url + self._system = system_prompt or _DEFAULT_SYSTEM + self._client = None + + def _ensure_client(self): + if self._client is not None: + return + from ms_agent.llm.openai_llm import OpenAI as OpenAILLM + self._client = OpenAILLM( + OmegaConf.create({ + 'llm': { + 'model': self._model, + 'openai_api_key': self._api_key, + 'openai_base_url': self._base_url, + }, + 'generation_config': {}, + })) + + def check(self, content: str) -> Optional[str]: + if not content or not content.strip(): + return None + try: + self._ensure_client() + text = content[:self._MAX_CHARS] + resp = self._client.generate(messages=[ + Message(role='system', content=self._system), + Message( + role='user', + content='---BEGIN---\n' + text + '\n---END---'), + ]) + raw = (resp.content or '').strip() + logger.info('LLMQualityChecker (%s): %s', self._model, raw[:200]) + verdict = json.loads(raw) + if verdict.get('pass', True): + return None + return verdict.get('reason', 'quality_check_failed') + except json.JSONDecodeError: + logger.warning('LLMQualityChecker: non-JSON response, treating as pass') + return None + except Exception as exc: # pragma: no cover - network/runtime + logger.warning('LLMQualityChecker: model call failed: %s', exc) + return None diff --git a/ms_agent/agent/templates/harness/round_reminder.py b/ms_agent/agent/templates/harness/round_reminder.py new file mode 100644 index 000000000..72ea3f92d --- /dev/null +++ b/ms_agent/agent/templates/harness/round_reminder.py @@ -0,0 +1,74 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""RoundReminderCallback -- inject a convergence reminder before the round +budget runs out. Generalized from deep_research v2 searcher_callback. +""" +from __future__ import annotations + +from typing import List + +from ms_agent.agent.runtime import Runtime +from ms_agent.callbacks.base import Callback +from ms_agent.llm.utils import Message +from ms_agent.utils import get_logger + +logger = get_logger() + +_DEFAULT_MESSAGE = ( + '[ROUND_REMINDER] You are on round of at most ' + '( left). Begin converging now: finish the current ' + 'sub-goal, avoid opening new threads, and prepare your final answer.') + + +class RoundReminderCallback(Callback): + """Inject a convergence reminder ``remind_before_max_round`` rounds before + ``max_chat_round``. + + Config block:: + + round_reminder: + enabled: true + remind_before_max_round: 2 # trigger at round == max_chat_round - this + remind_at_round: null # explicit override of the trigger round + message: "..." # optional; supports , + # , + """ + + _MARK = '[ROUND_REMINDER]' + + def __init__(self, config): + super().__init__(config) + cfg = getattr(config, 'round_reminder', None) + self.enabled = bool(getattr(cfg, 'enabled', False)) + self.remind_before = int(getattr(cfg, 'remind_before_max_round', 2)) + self.remind_at_round = getattr(cfg, 'remind_at_round', None) + self.message = getattr(cfg, 'message', None) + self.max_chat_round = int(getattr(config, 'max_chat_round', 0) or 0) + + async def on_generate_response(self, runtime: Runtime, + messages: List[Message]): + if not self.enabled: + return + trigger = self.remind_at_round + if trigger is None: + if not self.max_chat_round: + return + trigger = self.max_chat_round - self.remind_before + if runtime.round != trigger: + return + # de-dup: skip if a reminder is already among the recent messages + for m in messages[-10:]: + if m.role == 'user' and isinstance( + m.content, str) and self._MARK in m.content: + return + remaining = max(0, self.max_chat_round + - runtime.round) if self.max_chat_round else 0 + text = self.message or _DEFAULT_MESSAGE + text = (text.replace('', str(runtime.round)).replace( + '', + str(self.max_chat_round)).replace('', + str(remaining))) + if self._MARK not in text: + text = self._MARK + ' ' + text + messages.append(Message(role='user', content=text)) + logger.info('RoundReminderCallback: injected reminder at round %s', + runtime.round) diff --git a/ms_agent/agent/templates/harness/state_inject.py b/ms_agent/agent/templates/harness/state_inject.py new file mode 100644 index 000000000..974613853 --- /dev/null +++ b/ms_agent/agent/templates/harness/state_inject.py @@ -0,0 +1,63 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""StateInjectCallback -- fill environment placeholders in the system prompt. + +At task start, replaces ```` / ```` / ```` in the system +message with live values. Done once (task_begin) so the prompt stays stable for +the rest of the run (KV-cache friendly). Volatile per-round state (round count) +is intentionally NOT injected here -- that rides in user messages via +``RoundReminderCallback``. +""" +from __future__ import annotations + +import datetime +import os +import platform +from typing import List + +from ms_agent.agent.runtime import Runtime +from ms_agent.callbacks.base import Callback +from ms_agent.llm.utils import Message +from ms_agent.utils import get_logger + +logger = get_logger() + + +class StateInjectCallback(Callback): + """Substitute environment placeholders in the system prompt at task start. + + Config block (all optional):: + + state_inject: + enabled: true + fields: [date, cwd, os] # which placeholders to fill + """ + + def __init__(self, config): + super().__init__(config) + cfg = getattr(config, 'state_inject', None) + self.enabled = bool(getattr(cfg, 'enabled', True)) + fields = getattr(cfg, 'fields', None) + self.fields = set(fields) if fields else {'date', 'cwd', 'os'} + + def _values(self) -> dict: + vals = {} + if 'date' in self.fields: + vals[''] = datetime.datetime.now().strftime( + '%Y-%m-%d') + if 'cwd' in self.fields: + vals[''] = os.getcwd() + if 'os' in self.fields: + vals[''] = platform.platform() + return vals + + async def on_task_begin(self, runtime: Runtime, messages: List[Message]): + if not self.enabled or not messages: + return + msg = messages[0] + if msg.role != 'system' or not isinstance(msg.content, str): + return + content = msg.content + for placeholder, value in self._values().items(): + if placeholder in content: + content = content.replace(placeholder, value) + msg.content = content diff --git a/ms_agent/agent/templates/harness/stop_gate.py b/ms_agent/agent/templates/harness/stop_gate.py new file mode 100644 index 000000000..aabddc7ea --- /dev/null +++ b/ms_agent/agent/templates/harness/stop_gate.py @@ -0,0 +1,149 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""StopGateCallback -- run checks before honoring the agent's decision to stop; +on failure, inject a reflection message and force another round (bounded by +``max_retries``). Generalized from deep_research v2 researcher_callback. +""" +from __future__ import annotations + +import os +from typing import List, Optional + +from ms_agent.agent.runtime import Runtime +from ms_agent.callbacks.base import Callback +from ms_agent.llm.utils import Message +from ms_agent.utils import get_logger + +from .quality_check import LLMQualityChecker + +logger = get_logger() + +_DEFAULT_REFLECT = ( + 'Before finishing: an automated check found an issue -- {reason}. ' + 'Please address it and continue; do not stop yet.') + + +class StopGateCallback(Callback): + """Gate the agent's stop decision behind a chain of checks. + + Config block:: + + stop_gate: + enabled: true + max_retries: 2 + output_dir: null # base for relative artifact paths + checks: + - type: artifact_exists + path: final_report.md + - type: min_size_ratio + path: final_report.md + baseline: reports/draft.md + min_ratio: 0.5 + - type: llm_quality + path: null # null -> audit the last assistant message + model: qwen3.7-plus + message: "..." # optional reflection; supports {reason} + """ + + def __init__(self, config): + super().__init__(config) + cfg = getattr(config, 'stop_gate', None) + self.enabled = bool(getattr(cfg, 'enabled', False)) + self.max_retries = int(getattr(cfg, 'max_retries', 2)) + base = getattr(cfg, 'output_dir', None) + self.base_dir = base or getattr(config, 'output_dir', None) or '.' + self.checks = list(getattr(cfg, 'checks', None) or []) + self._llm = getattr(config, 'llm', None) + self._retries_used = 0 + + # ---- helpers ----------------------------------------------------------- + + def _resolve(self, path) -> Optional[str]: + if not path: + return None + return path if os.path.isabs(path) else os.path.join( + self.base_dir, path) + + @staticmethod + def _last_assistant_text(messages: List[Message]) -> str: + for m in reversed(messages): + if m.role == 'assistant' and isinstance( + m.content, str) and m.content.strip(): + return m.content + return '' + + def _run_check(self, check, messages: List[Message]) -> Optional[str]: + ctype = getattr(check, 'type', None) + path = self._resolve(getattr(check, 'path', None)) + + if ctype == 'artifact_exists': + if not path or not os.path.isfile(path): + return (getattr(check, 'message', None) + or f'expected artifact not found: ' + f'{getattr(check, "path", path)}') + return None + + if ctype == 'min_size_ratio': + baseline = self._resolve(getattr(check, 'baseline', None)) + min_ratio = float(getattr(check, 'min_ratio', 0.5)) + if not path or not os.path.isfile(path): + return getattr(check, 'message', + None) or 'expected artifact not found' + cur = os.path.getsize(path) + base = os.path.getsize(baseline) if ( + baseline and os.path.isfile(baseline)) else 0 + if base and (cur / base) < min_ratio: + return getattr( + check, 'message', + None) or 'the final artifact looks over-compressed' + return None + + if ctype == 'llm_quality': + if path and os.path.isfile(path): + with open(path, 'r', encoding='utf-8') as f: + content = f.read() + else: + content = self._last_assistant_text(messages) + if not content.strip(): + return None + model = str( + getattr(check, 'model', None) + or getattr(self._llm, 'model', 'qwen3.5-plus')) + api_key = getattr(check, 'openai_api_key', None) or getattr( + self._llm, 'openai_api_key', None) + base_url = getattr(check, 'openai_base_url', None) or getattr( + self._llm, 'openai_base_url', None) + checker = LLMQualityChecker(model, api_key, base_url, + getattr(check, 'system_prompt', None)) + return checker.check(content) + + logger.warning('StopGateCallback: unknown check type %r', ctype) + return None + + # ---- lifecycle --------------------------------------------------------- + + async def after_tool_call(self, runtime: Runtime, + messages: List[Message]): + if not self.enabled or not self.checks: + return + if not runtime.should_stop: + return + if self._retries_used >= self.max_retries: + return + for check in self.checks: + try: + reason = self._run_check(check, messages) + except Exception as exc: # pragma: no cover + logger.warning('StopGateCallback: check raised %s', exc) + reason = None + if reason: + tmpl = getattr(check, 'message', None) or _DEFAULT_REFLECT + text = tmpl.replace('{reason}', + reason) if '{reason}' in tmpl else tmpl + messages.append(Message(role='user', content=text)) + runtime.should_stop = False + self._retries_used += 1 + logger.info( + 'StopGateCallback: gate failed (%s); forcing another ' + 'round (%d/%d)', reason, self._retries_used, + self.max_retries) + return diff --git a/ms_agent/agent/templates/harness/subagent_limit.py b/ms_agent/agent/templates/harness/subagent_limit.py new file mode 100644 index 000000000..bc84f0fd1 --- /dev/null +++ b/ms_agent/agent/templates/harness/subagent_limit.py @@ -0,0 +1,82 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""SubagentLimitCallback -- hard cap on parallel sub-agent delegations per turn. + +If a single assistant response issues more than ``max_parallel`` delegation +(``agent_tools---*``) calls, the excess are dropped before execution (kept calls +still get their tool results, so nothing dangles). Generalized from deer-flow's +SubagentLimitMiddleware. + +Truncation happens in ``on_tool_call`` (before the tools run); the explanatory +*user* message is emitted in ``after_tool_call`` (injecting it before the tool +results would be malformed). +""" +from __future__ import annotations + +from typing import List + +from ms_agent.agent.runtime import Runtime +from ms_agent.callbacks.base import Callback +from ms_agent.llm.utils import Message +from ms_agent.utils import get_logger + +logger = get_logger() + +_DELEGATION_PREFIX = 'agent_tools---' + + +class SubagentLimitCallback(Callback): + """Config block:: + + subagent_limit: + enabled: true + max_parallel: 4 + """ + + def __init__(self, config): + super().__init__(config) + cfg = getattr(config, 'subagent_limit', None) + self.enabled = bool(getattr(cfg, 'enabled', True)) + self.max_parallel = int(getattr(cfg, 'max_parallel', 4)) + self._pending_dropped = 0 + + @staticmethod + def _name(tc) -> str: + return str(tc.get('tool_name', '') if isinstance(tc, dict) else getattr( + tc, 'tool_name', '')) + + def _is_delegation(self, tc) -> bool: + return self._name(tc).startswith(_DELEGATION_PREFIX) + + async def on_tool_call(self, runtime: Runtime, messages: List[Message]): + if not self.enabled or not messages: + return + m = messages[-1] + if m.role != 'assistant' or not m.tool_calls: + return + kept, dropped, seen = [], 0, 0 + for tc in m.tool_calls: + if self._is_delegation(tc): + seen += 1 + if seen > self.max_parallel: + dropped += 1 + continue + kept.append(tc) + if dropped: + m.tool_calls = kept + self._pending_dropped = dropped + logger.warning( + 'SubagentLimit: dropped %d excess delegation call(s) (max %d)', + dropped, self.max_parallel) + + async def after_tool_call(self, runtime: Runtime, messages: List[Message]): + if not self._pending_dropped: + return + dropped, self._pending_dropped = self._pending_dropped, 0 + messages.append( + Message( + role='user', + content=( + f'[SUBAGENT_LIMIT] At most {self.max_parallel} sub-agents ' + f'may run in parallel per turn; {dropped} extra ' + 'delegation(s) were dropped. Issue the remaining ones on a ' + 'later turn.'))) diff --git a/ms_agent/agent/templates/harness/todo_gate.py b/ms_agent/agent/templates/harness/todo_gate.py new file mode 100644 index 000000000..f450a08fd --- /dev/null +++ b/ms_agent/agent/templates/harness/todo_gate.py @@ -0,0 +1,88 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""TodoGateCallback -- don't let the agent stop with unfinished plan items. + +When the agent decides to stop (``runtime.should_stop`` is True) but the +todo_list plan (``plan.json``) still has incomplete items, inject a reminder and +keep going, bounded by ``max_reminders``. Generalized from deer-flow's +TodoMiddleware premature-exit guard. If there is no plan file, it never blocks. +""" +from __future__ import annotations + +import json +import os +from typing import List + +from ms_agent.agent.runtime import Runtime +from ms_agent.callbacks.base import Callback +from ms_agent.llm.utils import Message +from ms_agent.utils import get_logger + +logger = get_logger() + +_DONE = {'completed', 'cancelled'} + + +class TodoGateCallback(Callback): + """Config block:: + + todo_gate: + enabled: true + max_reminders: 2 + plan_file: plan.json # relative to output_dir (or absolute) + """ + + def __init__(self, config): + super().__init__(config) + cfg = getattr(config, 'todo_gate', None) + self.enabled = bool(getattr(cfg, 'enabled', True)) + self.max_reminders = int(getattr(cfg, 'max_reminders', 2)) + self.plan_file = str(getattr(cfg, 'plan_file', 'plan.json')) + self.output_dir = getattr(config, 'output_dir', None) or '.' + self._used = 0 + + def _plan_path(self) -> str: + return self.plan_file if os.path.isabs(self.plan_file) else os.path.join( + self.output_dir, self.plan_file) + + def _incomplete(self) -> list: + path = self._plan_path() + if not os.path.isfile(path): + return [] + try: + with open(path, 'r', encoding='utf-8') as f: + data = json.load(f) + except Exception: + return [] + if isinstance(data, dict): + todos = data.get('todos') or [] + elif isinstance(data, list): + todos = data + else: + todos = [] + return [ + t for t in todos + if str((t or {}).get('status', '')).strip().lower() not in _DONE + ] + + async def after_tool_call(self, runtime: Runtime, messages: List[Message]): + if not self.enabled or not runtime.should_stop: + return + if self._used >= self.max_reminders: + return + incomplete = self._incomplete() + if not incomplete: + return + sample = ', '.join( + str((t or {}).get('content', ''))[:40] for t in incomplete[:5]) + messages.append( + Message( + role='user', + content=( + f'[TODO_GATE] {len(incomplete)} planned item(s) are not yet ' + f'completed (e.g. {sample}). Continue and finish them, or ' + 'explicitly explain why they cannot be completed, before ' + 'stopping.'))) + runtime.should_stop = False + self._used += 1 + logger.info('TodoGate: blocked stop, %d incomplete (%d/%d)', + len(incomplete), self._used, self.max_reminders) diff --git a/ms_agent/agent/templates/plan/agent.yaml b/ms_agent/agent/templates/plan/agent.yaml new file mode 100644 index 000000000..d1d76a76d --- /dev/null +++ b/ms_agent/agent/templates/plan/agent.yaml @@ -0,0 +1,51 @@ +# Built-in template: plan (model_tier: reasoning) +# Read-only planning / analysis: produces a concrete plan, makes no changes. +llm: + service: openai + model: qwen3.7-plus + openai_api_key: + openai_base_url: + +generation_config: + stream: true + +prompt: + base: general # shared base prompt (prompts/base/general.md) + system: | # specialization only + For this task you operate in READ-ONLY mode. Hard constraints (these + override any other instruction): + - Do NOT modify files or run anything that changes system state. + - You may only read, search, and reason. + Method: + 1) Inspect the codebase / materials with read & search tools (and the web) + so the plan is grounded in reality, not guesses. + 2) Produce a concrete, ordered plan: each step states a clear action, the + files / components involved, and an acceptance criterion (how to verify it). + 3) Capture the plan with the todo tools, then present it clearly and stop. + Do not implement anything yourself -- output the plan only. + +callbacks: + - input_callback + - state_inject + - loop_guard # break invalid tool-call loops + - plan_check # completeness self-check when the plan is created + +# Audit the freshly-created plan against the user's request. +plan_check: + enabled: true + +tools: + file_system: + mcp: false + include: [read_file, grep, glob] # read-only: no write_file / edit_file + web_search: + mcp: false + engine: exa + fetch_content: true + max_results: 5 + todo_list: + mcp: false + auto_render_md: true + include: [todo_write, todo_read] # plan is recorded here, not in source files + +max_chat_round: 9999 diff --git a/ms_agent/agent/templates/prompts/base/general.md b/ms_agent/agent/templates/prompts/base/general.md new file mode 100644 index 000000000..c854a80f7 --- /dev/null +++ b/ms_agent/agent/templates/prompts/base/general.md @@ -0,0 +1,19 @@ +You are MS-Agent, a capable general-purpose AI agent. You help across research, analysis, writing, planning, coding, and everyday questions, and you are also a good conversational partner. + +Default to being helpful. Decline only when helping would create a concrete, specific risk of serious harm; requests that are merely edgy, hypothetical, or uncomfortable do not meet that bar. + +Calibrate effort to the request. Answer simple questions briefly and directly; give thorough, well-structured responses to complex, open-ended, or expert questions. + +Use your tools freely to ground your work. Reach for tools to gather context, verify facts, inspect files, search for current information, and take actions in the environment; grounding answers in real data and the live environment over relying on memory alone, unless you are confident that your memory is accurate, complete, and sufficient to answer directly. Run independent tool calls in parallel and dependent ones in order, and never guess missing parameters. + +Engage genuinely. Respond to what the user actually said, stay curious, and ask clarifying questions when needed, only after attempting the request. Skip flattery and filler openers; lead with the useful part. + +Be warm and direct. Assume competence, treat the user with respect, and push back honestly and constructively when you disagree. Accurate guidance beats easy agreement. + +Default to natural prose. Use headings, lists, or tables when asked or when the content is genuinely multifaceted; keep casual replies to a few sentences. Use code blocks for code, and reference code locations as `path:line` when relevant. + +Be honest. Don't fabricate facts, URLs, citations, or data; when unsure or unable to verify, say so. Own mistakes plainly without over-apologizing. + +Respond in the same language the user uses. + +Environment: current date ; working directory ; platform . Your knowledge may be out of date, so verify time-sensitive facts with tools. diff --git a/ms_agent/agent/templates/prompts/base/worker.md b/ms_agent/agent/templates/prompts/base/worker.md new file mode 100644 index 000000000..7c65224b6 --- /dev/null +++ b/ms_agent/agent/templates/prompts/base/worker.md @@ -0,0 +1,9 @@ +You are a focused sub-agent in MS-Agent, invoked to complete one well-scoped task and return a clear result. You are not in a multi-turn conversation: do the task, then report. + +Work from evidence. Use your tools actively to gather the context you need, and don't fabricate facts, file paths, URLs, citations, or data; if you cannot determine something, say so instead of guessing. + +Use your tools deliberately. Prefer the dedicated tools for reading, searching, and editing; run independent calls in parallel and dependent ones in order; never guess missing parameters. Stay within the scope of the assigned task. + +Make your final result concise and structured: state what you found or did, the key evidence or paths, and anything the caller needs to act on. Lead with the answer. + +Environment: current date ; working directory ; platform . diff --git a/ms_agent/agent/templates/registry.py b/ms_agent/agent/templates/registry.py new file mode 100644 index 000000000..a7c74f4b9 --- /dev/null +++ b/ms_agent/agent/templates/registry.py @@ -0,0 +1,133 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Built-in template agent registry. + +A *template* is an ordinary ms-agent config directory (``agent.yaml`` + optional +``prompts/``) that ships with the package and is resolvable by a bare name such +as ``general``, ``plan``, ``explore``, ``build`` or ``research``. + +This module exposes two things: + +1. ``resolve_template_source`` / ``resolve_template_dir`` -- name -> directory + resolution, inserted into the existing config-loading seams (``Config.from_task``, + ``AgentLoader.build``, the CLI). Local paths always win; unknown names fall + through to the existing ModelScope resolution. +2. ``load_manifest`` / ``list_templates`` / ``get_when_to_use`` -- the template + manifest (``registry.yaml``), used for CLI listing, WebUI selectors and + sub-agent delegation description synthesis. + +Resolution priority for a template name: project override > user override > +built-in. +""" +from __future__ import annotations + +import os +from pathlib import Path +from typing import Dict, List, Optional + +# Directory holding the built-in templates (this file's directory). +BUILTIN_DIR = Path(__file__).resolve().parent +_MANIFEST_FILE = BUILTIN_DIR / 'registry.yaml' +_CONFIG_FILES = ('agent.yaml', 'agent.yml') + +# User-level override location: ~/.ms_agent/agents// +_USER_AGENTS_DIR = Path(os.path.expanduser('~/.ms_agent/agents')) +# Project-level override subdir (relative to a project root): +# /.ms-agent/agents// +_PROJECT_AGENTS_SUBDIR = os.path.join('.ms-agent', 'agents') + + +def _has_config(dir_path: Path) -> bool: + return any((dir_path / name).is_file() for name in _CONFIG_FILES) + + +def is_template_name(name: str) -> bool: + """A bare template name has no path separators (so it can never be confused + with a local path or a ``org/repo`` ModelScope id).""" + if not name or not isinstance(name, str): + return False + if os.sep in name or '/' in name: + return False + return True + + +def resolve_template_dir(name: str, + project_path: Optional[str] = None) -> Optional[str]: + """Resolve a template *name* to a config directory. + + Priority: project override > user override > built-in. Returns ``None`` when + the name is not a known template. + """ + if not is_template_name(name): + return None + # 1) project-level override + if project_path: + cand = Path(project_path) / _PROJECT_AGENTS_SUBDIR / name + if _has_config(cand): + return str(cand) + # 2) user-level override + cand = _USER_AGENTS_DIR / name + if _has_config(cand): + return str(cand) + # 3) built-in + cand = BUILTIN_DIR / name + if _has_config(cand): + return str(cand) + return None + + +def resolve_template_source(config_dir_or_id: str, + project_path: Optional[str] = None) -> str: + """Passthrough resolver for the config-loading seams. + + - If ``config_dir_or_id`` is an existing path, return it unchanged (local + wins). + - Else if it is a known template name, return the resolved template dir. + - Else return it unchanged (the caller's ModelScope fallback handles it). + + Importing the template package also triggers built-in harness callback + registration (see ``ms_agent/agent/templates/__init__.py``), so that + templates referencing ``callbacks: [stop_gate, ...]`` resolve without + ``trust_remote_code``. + """ + if not config_dir_or_id or not isinstance(config_dir_or_id, str): + return config_dir_or_id + if os.path.exists(config_dir_or_id): + return config_dir_or_id + resolved = resolve_template_dir(config_dir_or_id, project_path=project_path) + return resolved if resolved is not None else config_dir_or_id + + +def load_manifest() -> Dict[str, dict]: + """Load ``registry.yaml`` -> ``{name: {description, mode, when_to_use, + model_tier}}``. Returns ``{}`` if the manifest is missing/unreadable.""" + if not _MANIFEST_FILE.is_file(): + return {} + try: + # Lazy import keeps the hot resolution path free of omegaconf. + from omegaconf import OmegaConf + data = OmegaConf.to_container( + OmegaConf.load(str(_MANIFEST_FILE)), resolve=True) + except Exception: + return {} + templates = (data or {}).get('templates', {}) or {} + return {str(k): (v or {}) for k, v in templates.items()} + + +def list_templates(mode: Optional[str] = None) -> List[dict]: + """List templates from the manifest. When ``mode`` is given, include only + templates whose mode matches it or is ``all`` (e.g. ``mode='primary'`` + returns entry-point templates for a UI selector).""" + out: List[dict] = [] + for name, meta in load_manifest().items(): + if mode is not None and meta.get('mode') not in (mode, 'all'): + continue + out.append({'name': name, **meta}) + return out + + +def get_when_to_use(name: str) -> str: + """Delegation description for a template (used to synthesize the sub-agent + tool description in ``expand_subagents``).""" + meta = load_manifest().get(name) or {} + return (meta.get('when_to_use') or meta.get('description') + or f'Delegate a subtask to the {name} sub-agent.') diff --git a/ms_agent/agent/templates/registry.yaml b/ms_agent/agent/templates/registry.yaml new file mode 100644 index 000000000..b0ba461a0 --- /dev/null +++ b/ms_agent/agent/templates/registry.yaml @@ -0,0 +1,47 @@ +# Built-in template agent manifest. +# +# Single source of truth for: CLI `--list-templates`, WebUI template selector, +# and sub-agent delegation description synthesis (`expand_subagents`). +# +# Fields per template: +# mode : primary | subagent | all (which roles a template is offered as) +# model_tier : strong | reasoning | fast (intended model tier; informational) +# when_to_use : description shown to a parent agent to decide when to delegate +version: 1 +templates: + general: + description: General-purpose agent with full analyze -> plan -> execute capability; can delegate to sub-agents. + mode: all + model_tier: strong + when_to_use: >- + Default entry point. Handles general tasks that need multiple tools and + multi-step reasoning, and can delegate focused subtasks to explore / build + / research sub-agents. + plan: + description: Read-only planning & analysis agent; produces a concrete plan without making changes. + mode: primary + model_tier: reasoning + when_to_use: >- + Use to think a task through and produce a structured, step-by-step plan + before any changes are made; safe (read-only) for restricted mode. + explore: + description: Fast read-only exploration agent; locates files and answers questions about a codebase or materials. + mode: subagent + model_tier: fast + when_to_use: >- + Delegate when you need to search a large codebase or set of materials and + are not confident a single query will find the right match. Read-only. + build: + description: Coding agent; implements, edits and debugs code, verifying with tests. + mode: subagent + model_tier: strong + when_to_use: >- + Delegate a well-specified coding subtask: implement or modify source files + and verify them by running tests / checking diagnostics. + research: + description: Research agent; searches, synthesizes and produces a cited report. + mode: subagent + model_tier: reasoning + when_to_use: >- + Delegate an in-depth research subtask on a specific topic; returns dense, + citation-backed findings. diff --git a/ms_agent/agent/templates/research/agent.yaml b/ms_agent/agent/templates/research/agent.yaml new file mode 100644 index 000000000..960d94383 --- /dev/null +++ b/ms_agent/agent/templates/research/agent.yaml @@ -0,0 +1,65 @@ +# Built-in template: research (model_tier: reasoning) +# Research sub-agent: search -> synthesize -> cited report. +llm: + service: openai + model: qwen3.7-plus + openai_api_key: + openai_base_url: + +generation_config: + stream: true + +prompt: + base: worker # shared sub-agent base prompt (prompts/base/worker.md) + system: | # specialization only + You are a research agent: search, synthesize, and produce a citation-backed + answer. + - Search first; do NOT fabricate URLs, facts, figures, or APIs. If you + cannot find something, say so explicitly. + - Every key claim must be supported by a cited source. Use numbered markers + [1], [2] inline, and a single "References" section at the end that maps + one-to-one with the markers. + - Write dense, well-structured findings; avoid filler. + +# No interactive input_callback: research is a run-once / sub-agent template. +callbacks: + - state_inject # fill // in the base prompt + - loop_guard # break invalid tool-call loops + - round_reminder # nudge convergence before the round budget runs out + - stop_gate # don't stop on a low-quality / placeholder answer + +# Harness: converge near the round limit, and audit the final answer before +# the agent is allowed to stop. +round_reminder: + enabled: true + remind_before_max_round: 2 # remind at round == max_chat_round - 2 + +stop_gate: + enabled: true + max_retries: 2 + checks: + # Audit the last assistant message: no placeholders, no fabricated + # citations, no "see external file". Uses the agent's own llm by default. + - type: llm_quality + message: >- + Before finishing, an automated quality check flagged your answer: + {reason}. Please fix this -- replace any placeholder/abbreviated content + with the real text, ensure every claim has a valid cited source, and + keep a single References section -- then continue. + +tools: + web_search: + mcp: false + engine: exa + fetch_content: true + max_results: 8 + file_system: + mcp: false + include: [read_file, write_file, edit_file, grep, glob] + todo_list: + mcp: false + auto_render_md: true + include: [todo_write, todo_read] + +max_chat_round: 30 + diff --git a/ms_agent/agent/templates/subagent_expand.py b/ms_agent/agent/templates/subagent_expand.py new file mode 100644 index 000000000..fc082553b --- /dev/null +++ b/ms_agent/agent/templates/subagent_expand.py @@ -0,0 +1,103 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Expand the ``subagents:`` shorthand into ``tools.agent_tools.definitions``. + +A template (or any agent config) may declare:: + + subagents: [explore, build, research] + +which is expanded, at config-load time, into the equivalent ``agent_tools`` +definitions, each pointing at the corresponding template via ``template://``:: + + tools: + agent_tools: + definitions: + - tool_name: explore + config_path: template://explore + description: + parameters: {request: string} + output_mode: final_message + +Non-destructive and idempotent: an explicit definition with the same +``tool_name`` always wins, so users can hand-tune any entry. +""" +from __future__ import annotations + +from omegaconf import DictConfig, OmegaConf + +from .registry import get_when_to_use + + +def _standard_parameters(name: str) -> dict: + return { + 'type': 'object', + 'properties': { + 'request': { + 'type': + 'string', + 'description': + (f'A self-contained task description for the {name} sub-agent. ' + 'Include all necessary context; the sub-agent does not see this ' + 'conversation.'), + }, + }, + 'required': ['request'], + 'additionalProperties': False, + } + + +def expand_subagents(config: DictConfig) -> DictConfig: + """Expand ``config.subagents`` into ``config.tools.agent_tools.definitions``. + + Returns ``config`` unchanged when there is no ``subagents`` key. + """ + subagents = getattr(config, 'subagents', None) + if not subagents: + return config + try: + names = [ + str(n) for n in OmegaConf.to_container(subagents, resolve=True) + ] + except Exception: + return config + names = [n for n in names if n] + if not names: + return config + + if not hasattr(config, 'tools') or config.tools is None: + config.tools = DictConfig({}) + tools = config.tools + + # Build the agent_tools block as a plain container, then assign it back in + # one step. OmegaConf copies on assignment, so mutating a node *after* + # assigning it would not persist -- build-then-assign-once avoids that. + existing_at = getattr(tools, 'agent_tools', None) + if existing_at is None: + at_dict = {'mcp': False} + else: + at_dict = OmegaConf.to_container(existing_at, resolve=True) + at_dict = at_dict if isinstance(at_dict, dict) else {} + existing_defs = at_dict.get('definitions') or [] + + existing_names = set() + for d in existing_defs: + tn = (d.get('tool_name') or d.get('name')) if isinstance(d, + dict) else None + if tn is not None: + existing_names.add(str(tn)) + + generated = [] + for name in names: + if name in existing_names: + continue # explicit definition wins + generated.append({ + 'tool_name': name, + 'description': get_when_to_use(name), + 'config_path': f'template://{name}', + 'parameters': _standard_parameters(name), + 'output_mode': 'final_message', + 'max_output_chars': 200000, + }) + + at_dict['definitions'] = list(existing_defs) + generated + tools.agent_tools = DictConfig(at_dict) + return config diff --git a/ms_agent/cli/run.py b/ms_agent/cli/run.py index 1bb1d67ad..8b5a7a341 100644 --- a/ms_agent/cli/run.py +++ b/ms_agent/cli/run.py @@ -195,8 +195,16 @@ def _execute_with_config(self): default_config_path) as config_file: self.args.config = str(config_file) elif not os.path.exists(self.args.config): - from modelscope import snapshot_download - self.args.config = snapshot_download(self.args.config) + # Resolve built-in template names (e.g. "general", "plan") before + # falling back to ModelScope. + from ms_agent.agent.templates.registry import \ + resolve_template_source + resolved = resolve_template_source(self.args.config) + if os.path.exists(resolved): + self.args.config = resolved + else: + from modelscope import snapshot_download + self.args.config = snapshot_download(resolved) self.args.trust_remote_code = strtobool( self.args.trust_remote_code) # noqa self.args.load_cache = strtobool(self.args.load_cache) diff --git a/ms_agent/config/config.py b/ms_agent/config/config.py index c1f1ab85c..1f3680808 100644 --- a/ms_agent/config/config.py +++ b/ms_agent/config/config.py @@ -68,6 +68,11 @@ def from_task(cls, Returns: The config object. """ + # Built-in template names (e.g. "general", "plan", "explore") resolve to + # packaged config dirs. Local paths win; unknown names fall through to + # the ModelScope resolution below. + from ms_agent.agent.templates.registry import resolve_template_source + config_dir_or_id = resolve_template_source(config_dir_or_id) if not os.path.exists(config_dir_or_id): config_dir_or_id = snapshot_download(config_dir_or_id) @@ -103,6 +108,25 @@ def from_task(cls, except Exception: # Never block config loading due to prompt resolving. pass + # Layered prompt: prepend the selected base (prompt.base) to the + # template's specialization (prompt.system). + try: + if isinstance(config, DictConfig): + from ms_agent.agent.templates.compose_prompt import \ + compose_system_prompt + config = compose_system_prompt(config) + except Exception: + # Never block config loading due to prompt composition. + pass + # Expand the `subagents: [...]` shorthand into agent_tools definitions. + try: + if isinstance(config, DictConfig): + from ms_agent.agent.templates.subagent_expand import \ + expand_subagents + config = expand_subagents(config) + except Exception: + # Never block config loading due to sub-agent expansion. + pass return config @staticmethod diff --git a/ms_agent/tools/agent_tool.py b/ms_agent/tools/agent_tool.py index 926bfdb01..a5ef45c7e 100644 --- a/ms_agent/tools/agent_tool.py +++ b/ms_agent/tools/agent_tool.py @@ -56,6 +56,12 @@ class _AgentToolSpec: max_subtask_output_chars: int = 8192 run_in_background: bool = False sync_timeout_s: Optional[float] = None + subagent_depth: int = 0 + + +# Maximum sub-agent delegation depth. A sub-agent at depth >= this value will +# not register any delegation tools, preventing unbounded recursion. +_MAX_SUBAGENT_DEPTH = int(os.getenv('MS_MAX_SUBAGENT_DEPTH', '2')) _MESSAGE_FIELDS = set(Message.__dataclass_fields__.keys()) @@ -84,7 +90,10 @@ def _build_sub_agent(spec: _AgentToolSpec, default_trust_remote_code: bool): # on the merged agent config (e.g. in the sub-agent YAML). config_override = OmegaConf.merge( base_override, - OmegaConf.create({'ms_agent_subagent': True}), + OmegaConf.create({ + 'ms_agent_subagent': True, + '_subagent_depth': spec.subagent_depth, + }), ) trust_remote_code = spec.trust_remote_code @@ -215,6 +224,8 @@ def __init__(self, config: DictConfig, **kwargs): self._active_sync_tasks: Dict[str, Any] = {} # effective_call_id -> stream file path (set during _run_agent, consumed by call_tool) self._stream_paths: Dict[str, str] = {} + # Delegation depth of THIS agent (0 = top-level). Children get depth+1. + self._depth = int(getattr(config, '_subagent_depth', 0) or 0) self._load_specs() @property @@ -251,6 +262,12 @@ def enabled(self) -> bool: } def _load_specs(self): + # Depth cap: a sub-agent at/over the max depth cannot delegate further. + if self._depth >= _MAX_SUBAGENT_DEPTH: + logger.info( + 'AgentTool: sub-agent at depth %d >= max %d; delegation disabled.', + self._depth, _MAX_SUBAGENT_DEPTH) + return tools_cfg = getattr(self.config, 'tools', DictConfig({})) # Backward compat: if config.tools.split_task exists, register a built-in dynamic spec @@ -279,6 +296,7 @@ def _load_specs(self): run_in_process=run_in_process, dynamic=True, max_subtask_output_chars=8192, + subagent_depth=self._depth + 1, ) self._specs['split_to_sub_task'] = builtin_spec @@ -402,11 +420,8 @@ def _build_spec(self, cfg: Union[DictConfig, Dict[str, Any]], elif disallowed_tools is not None: disallowed_tools = None - if config_path and not os.path.isabs(config_path): - base_dir = getattr(self.config, 'local_dir', None) - if base_dir: - config_path = os.path.normpath( - os.path.join(base_dir, config_path)) + if config_path: + config_path = self._resolve_config_path(config_path) return _AgentToolSpec( tool_name=tool_name, @@ -430,8 +445,33 @@ def _build_spec(self, cfg: Union[DictConfig, Dict[str, Any]], max_subtask_output_chars=max_subtask_chars, run_in_background=bool(getattr(cfg, 'run_in_background', False)), sync_timeout_s=float(getattr(cfg, 'sync_timeout_s', 0)) or None, + subagent_depth=self._depth + 1, ) + def _resolve_config_path(self, config_path: str) -> str: + """Resolve a sub-agent ``config_path``. + + Supports built-in template references -- ``template://`` or a bare + template name (e.g. ``explore``) -- in addition to absolute paths and + paths relative to the parent agent's ``local_dir`` (the original + behaviour). + """ + from ms_agent.agent.templates.registry import resolve_template_dir + if config_path.startswith('template://'): + name = config_path[len('template://'):].strip() + return resolve_template_dir(name) or config_path + # Bare template name (only if it actually matches a known template). + if not os.path.isabs(config_path) and not os.path.exists(config_path): + hit = resolve_template_dir(config_path) + if hit: + return hit + # Original behaviour: resolve relative paths against the parent's dir. + if not os.path.isabs(config_path): + base_dir = getattr(self.config, 'local_dir', None) + if base_dir: + return os.path.normpath(os.path.join(base_dir, config_path)) + return config_path + def _build_server_index(self): server_map: Dict[str, List[Tool]] = {} for spec in self._specs.values(): @@ -784,6 +824,7 @@ async def _run_one(i: int, task: dict) -> str: dynamic=False, disallowed_tools=spec.disallowed_tools, max_subtask_output_chars=spec.max_subtask_output_chars, + subagent_depth=spec.subagent_depth, ) use_subprocess = sub_spec.run_in_thread and sub_spec.run_in_process agent = None if use_subprocess else self._build_agent(sub_spec) diff --git a/requirements/framework.txt b/requirements/framework.txt index 7b7c84fc1..77914a3dc 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -3,6 +3,7 @@ dotenv edge_tts faiss-cpu json5 +loguru markdown matplotlib mcp diff --git a/setup.py b/setup.py index aec42aa2f..3c6bbeb24 100644 --- a/setup.py +++ b/setup.py @@ -262,6 +262,7 @@ def _build_and_copy_webui(self): package_data={ 'ms_agent': [ 'projects/**/*', + 'agent/templates/**/*', 'webui/backend/**/*', 'webui/frontend/dist/**/*', ], diff --git a/tests/agent/test_harness_callbacks.py b/tests/agent/test_harness_callbacks.py new file mode 100644 index 000000000..72464e703 --- /dev/null +++ b/tests/agent/test_harness_callbacks.py @@ -0,0 +1,121 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import asyncio + +from omegaconf import OmegaConf + +from ms_agent.agent.runtime import Runtime +from ms_agent.agent.templates.harness.round_reminder import \ + RoundReminderCallback +from ms_agent.agent.templates.harness.stop_gate import StopGateCallback +from ms_agent.llm.utils import Message + + +def _run(coro): + return asyncio.run(coro) + + +def test_harness_self_registered(): + import ms_agent.agent.templates # noqa: F401 (import triggers registration) + from ms_agent.callbacks import callbacks_mapping + assert 'round_reminder' in callbacks_mapping + assert 'stop_gate' in callbacks_mapping + + +def test_round_reminder_triggers_then_dedups(): + cfg = OmegaConf.create({ + 'max_chat_round': 30, + 'round_reminder': { + 'enabled': True, + 'remind_before_max_round': 2 + }, + }) + cb = RoundReminderCallback(cfg) + msgs = [] + rt = Runtime(round=28) # 30 - 2 + _run(cb.on_generate_response(rt, msgs)) + assert len(msgs) == 1 + assert '[ROUND_REMINDER]' in msgs[0].content + # de-dup: a second call on the same round must not add another reminder + _run(cb.on_generate_response(rt, msgs)) + assert len(msgs) == 1 + + +def test_round_reminder_silent_off_threshold(): + cfg = OmegaConf.create({ + 'max_chat_round': 30, + 'round_reminder': { + 'enabled': True, + 'remind_before_max_round': 2 + }, + }) + cb = RoundReminderCallback(cfg) + msgs = [] + _run(cb.on_generate_response(Runtime(round=10), msgs)) + assert msgs == [] + + +def test_round_reminder_disabled_by_default(): + cb = RoundReminderCallback(OmegaConf.create({'max_chat_round': 30})) + msgs = [] + _run(cb.on_generate_response(Runtime(round=28), msgs)) + assert msgs == [] + + +def test_stop_gate_blocks_stop_when_artifact_missing(tmp_path): + cfg = OmegaConf.create({ + 'output_dir': str(tmp_path), + 'stop_gate': { + 'enabled': True, + 'max_retries': 2, + 'checks': [{ + 'type': 'artifact_exists', + 'path': 'report.md' + }], + }, + }) + cb = StopGateCallback(cfg) + rt = Runtime(should_stop=True) + msgs = [Message(role='assistant', content='done')] + _run(cb.after_tool_call(rt, msgs)) + assert rt.should_stop is False # gate blocked the stop + assert len(msgs) == 2 # reflection message injected + + +def test_stop_gate_allows_stop_when_artifact_present(tmp_path): + (tmp_path / 'report.md').write_text('hello world') + cfg = OmegaConf.create({ + 'output_dir': str(tmp_path), + 'stop_gate': { + 'enabled': True, + 'checks': [{ + 'type': 'artifact_exists', + 'path': 'report.md' + }], + }, + }) + cb = StopGateCallback(cfg) + rt = Runtime(should_stop=True) + msgs = [Message(role='assistant', content='done')] + _run(cb.after_tool_call(rt, msgs)) + assert rt.should_stop is True # gate let it stop + + +def test_stop_gate_respects_max_retries(tmp_path): + cfg = OmegaConf.create({ + 'output_dir': str(tmp_path), + 'stop_gate': { + 'enabled': True, + 'max_retries': 1, + 'checks': [{ + 'type': 'artifact_exists', + 'path': 'nope.md' + }], + }, + }) + cb = StopGateCallback(cfg) + rt1 = Runtime(should_stop=True) + _run(cb.after_tool_call(rt1, [Message(role='assistant', content='x')])) + assert rt1.should_stop is False # blocked once + rt2 = Runtime(should_stop=True) + _run(cb.after_tool_call(rt2, [Message(role='assistant', content='x')])) + assert rt2.should_stop is True # retries exhausted -> allowed diff --git a/tests/agent/test_loop_guard.py b/tests/agent/test_loop_guard.py new file mode 100644 index 000000000..0f0feb37d --- /dev/null +++ b/tests/agent/test_loop_guard.py @@ -0,0 +1,100 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import asyncio + +from omegaconf import OmegaConf + +from ms_agent.agent.runtime import Runtime +from ms_agent.agent.templates.harness.loop_guard import LoopGuardCallback +from ms_agent.llm.utils import Message + + +def _run(c): + return asyncio.run(c) + + +def _step(cb, rt, tc): + """One round: assistant emits `tc`; run detect (on_tool_call) then + inject (after_tool_call). Returns the round's messages.""" + msgs = [Message(role='assistant', tool_calls=[tc])] + _run(cb.on_tool_call(rt, msgs)) + injected_before = len(msgs) + _run(cb.after_tool_call(rt, msgs)) + return msgs, injected_before + + +def _tc(name, args): + return {'tool_name': name, 'arguments': args} + + +def test_repeated_signature_warns_then_hard_stops(): + cb = LoopGuardCallback( + OmegaConf.create({'loop_guard': {'warn': 3, 'hard': 5}})) + rt = Runtime() + tc = _tc('file_system---read_file', '{"path": "a.py"}') + # rounds 1,2: nothing + for _ in range(2): + msgs, _ = _step(cb, rt, tc) + assert len(msgs) == 1 + # round 3: warn injected (in after_tool_call, not before) + msgs, before = _step(cb, rt, tc) + assert before == 1 # nothing added during on_tool_call + assert any('[LOOP_GUARD]' in m.content for m in msgs if m.role == 'user') + assert rt.should_stop is False + # round 5: hard stop + _step(cb, rt, tc) + msgs, _ = _step(cb, rt, tc) + assert rt.should_stop is True + assert any('stopped' in m.content for m in msgs if m.role == 'user') + + +def test_distinct_calls_do_not_trip(): + cb = LoopGuardCallback( + OmegaConf.create({'loop_guard': {'warn': 3, 'hard': 5}})) + rt = Runtime() + for i in range(6): + msgs, _ = _step(cb, rt, + _tc('file_system---grep', '{"pattern": "x%d"}' % i)) + assert len(msgs) == 1 # all different -> no loop + assert rt.should_stop is False + + +def test_read_file_line_bucketing_counts_as_repeat(): + cb = LoopGuardCallback( + OmegaConf.create({'loop_guard': {'warn': 3, 'hard': 99}})) + rt = Runtime() + # same file, small line drift within the same 200-line bucket -> same sig + for start in (0, 10, 20): + msgs, _ = _step( + cb, rt, + _tc('file_system---read_file', + '{"path": "a.py", "start": %d}' % start)) + assert any('[LOOP_GUARD]' in m.content for m in msgs if m.role == 'user') + + +def test_frequency_hard_stop(): + cb = LoopGuardCallback( + OmegaConf.create({ + 'loop_guard': { + 'warn': 99, + 'hard': 99, + 'freq_warn': 99, + 'freq_hard': 4 + } + })) + rt = Runtime() + for i in range(4): + msgs, _ = _step(cb, rt, + _tc('web_search---exa_search', '{"q": "x%d"}' % i)) + assert rt.should_stop is True + + +def test_disabled(): + cb = LoopGuardCallback( + OmegaConf.create({'loop_guard': {'enabled': False}})) + rt = Runtime() + tc = _tc('file_system---read_file', '{"path": "a.py"}') + for _ in range(10): + msgs, _ = _step(cb, rt, tc) + assert rt.should_stop is False + assert all(len(m.content) == 0 or '[LOOP_GUARD]' not in m.content + for m in msgs if m.role == 'user') diff --git a/tests/agent/test_plan_check.py b/tests/agent/test_plan_check.py new file mode 100644 index 000000000..57d6515d7 --- /dev/null +++ b/tests/agent/test_plan_check.py @@ -0,0 +1,106 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import asyncio +import json + +from omegaconf import OmegaConf + +from ms_agent.agent.runtime import Runtime +from ms_agent.agent.templates.harness import plan_check as plan_check_mod +from ms_agent.agent.templates.harness.plan_check import PlanCheckCallback +from ms_agent.llm.utils import Message + + +def _run(c): + return asyncio.run(c) + + +class _FakeChecker: + """Stand-in for LLMQualityChecker: returns a preset verdict, no network.""" + verdict = None + + def __init__(self, *a, **k): + pass + + def check(self, content): + return _FakeChecker.verdict + + +def _cfg(tmp_path): + return OmegaConf.create({ + 'output_dir': str(tmp_path), + 'llm': {'model': 'fake', 'openai_api_key': 'k', 'openai_base_url': 'u'}, + }) + + +def _plan(tmp_path): + (tmp_path / 'plan.json').write_text( + json.dumps({'todos': [{'content': 'step 1', 'status': 'pending'}]}), + encoding='utf-8') + + +def _todo_write_msgs(): + return [ + Message(role='user', content='Add subtraction to the calculator'), + Message(role='assistant', + tool_calls=[{'tool_name': 'todo_list---todo_write', + 'arguments': '{}'}]), + Message(role='tool', content='ok'), + ] + + +def test_injects_feedback_when_incomplete(tmp_path, monkeypatch): + monkeypatch.setattr(plan_check_mod, 'LLMQualityChecker', _FakeChecker) + _FakeChecker.verdict = 'missing the division requirement' + _plan(tmp_path) + cb = PlanCheckCallback(_cfg(tmp_path)) + msgs = _todo_write_msgs() + _run(cb.on_task_begin(Runtime(), msgs)) + _run(cb.after_tool_call(Runtime(), msgs)) + assert any('[PLAN_CHECK]' in m.content for m in msgs if m.role == 'user' + and 'missing the division' in m.content) + + +def test_no_feedback_when_complete(tmp_path, monkeypatch): + monkeypatch.setattr(plan_check_mod, 'LLMQualityChecker', _FakeChecker) + _FakeChecker.verdict = None # plan judged complete + _plan(tmp_path) + cb = PlanCheckCallback(_cfg(tmp_path)) + msgs = _todo_write_msgs() + _run(cb.on_task_begin(Runtime(), msgs)) + _run(cb.after_tool_call(Runtime(), msgs)) + assert not any('[PLAN_CHECK]' in m.content for m in msgs + if m.role == 'user') + + +def test_ignores_non_plan_tool_calls(tmp_path, monkeypatch): + monkeypatch.setattr(plan_check_mod, 'LLMQualityChecker', _FakeChecker) + _FakeChecker.verdict = 'should not be called' + _plan(tmp_path) + cb = PlanCheckCallback(_cfg(tmp_path)) + msgs = [ + Message(role='user', content='find X'), + Message(role='assistant', + tool_calls=[{'tool_name': 'file_system---grep', + 'arguments': '{}'}]), + Message(role='tool', content='ok'), + ] + _run(cb.on_task_begin(Runtime(), msgs)) + _run(cb.after_tool_call(Runtime(), msgs)) + assert not any('[PLAN_CHECK]' in m.content for m in msgs + if m.role == 'user') + + +def test_only_first_plan_creation_is_checked(tmp_path, monkeypatch): + monkeypatch.setattr(plan_check_mod, 'LLMQualityChecker', _FakeChecker) + _FakeChecker.verdict = 'gap' + _plan(tmp_path) + cb = PlanCheckCallback(_cfg(tmp_path)) + msgs = _todo_write_msgs() + _run(cb.on_task_begin(Runtime(), msgs)) + _run(cb.after_tool_call(Runtime(), msgs)) + first = sum(1 for m in msgs if m.role == 'user' and '[PLAN_CHECK]' in m.content) + # a second plan write should NOT trigger another check + msgs2 = _todo_write_msgs() + _run(cb.after_tool_call(Runtime(), msgs2)) + second = sum(1 for m in msgs2 if m.role == 'user' and '[PLAN_CHECK]' in m.content) + assert first == 1 and second == 0 diff --git a/tests/agent/test_prompt_layering.py b/tests/agent/test_prompt_layering.py new file mode 100644 index 000000000..9662281a5 --- /dev/null +++ b/tests/agent/test_prompt_layering.py @@ -0,0 +1,91 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import asyncio + +from omegaconf import OmegaConf + +from ms_agent.agent.runtime import Runtime +from ms_agent.agent.templates.compose_prompt import compose_system_prompt +from ms_agent.agent.templates.harness.state_inject import StateInjectCallback +from ms_agent.llm.utils import Message + + +def _run(coro): + return asyncio.run(coro) + + +# ---- compose_system_prompt ------------------------------------------------ + +def test_compose_prepends_general_base(): + cfg = OmegaConf.create({'prompt': {'base': 'general', 'system': 'SPEC'}}) + compose_system_prompt(cfg) + assert cfg.prompt.system.startswith('You are MS-Agent') + assert cfg.prompt.system.rstrip().endswith('SPEC') + + +def test_compose_prepends_worker_base(): + cfg = OmegaConf.create({'prompt': {'base': 'worker', 'system': 'SPEC'}}) + compose_system_prompt(cfg) + assert 'focused sub-agent in MS-Agent' in cfg.prompt.system + assert 'SPEC' in cfg.prompt.system + + +def test_compose_none_is_untouched(): + cfg = OmegaConf.create({'prompt': {'base': 'none', 'system': 'SPEC'}}) + compose_system_prompt(cfg) + assert cfg.prompt.system == 'SPEC' + + +def test_compose_missing_base_is_untouched(): + cfg = OmegaConf.create({'prompt': {'system': 'SPEC'}}) + compose_system_prompt(cfg) + assert cfg.prompt.system == 'SPEC' + + +def test_compose_unknown_base_is_untouched(): + cfg = OmegaConf.create({'prompt': {'base': 'nope', 'system': 'SPEC'}}) + compose_system_prompt(cfg) + assert cfg.prompt.system == 'SPEC' + + +def test_compose_empty_spec_just_base(): + cfg = OmegaConf.create({'prompt': {'base': 'worker', 'system': ''}}) + compose_system_prompt(cfg) + assert 'focused sub-agent in MS-Agent' in cfg.prompt.system + + +# ---- StateInjectCallback -------------------------------------------------- + +def test_state_inject_fills_placeholders(): + cfg = OmegaConf.create({}) + cb = StateInjectCallback(cfg) + msgs = [ + Message( + role='system', + content='date ; cwd ; os ; spec'), + ] + _run(cb.on_task_begin(Runtime(), msgs)) + c = msgs[0].content + assert '' not in c and '' not in c and '' not in c + assert 'spec' in c + + +def test_state_inject_ignores_non_system_first_message(): + cfg = OmegaConf.create({}) + cb = StateInjectCallback(cfg) + msgs = [Message(role='user', content='hello ')] + _run(cb.on_task_begin(Runtime(), msgs)) + assert msgs[0].content == 'hello ' # untouched + + +def test_state_inject_disabled(): + cfg = OmegaConf.create({'state_inject': {'enabled': False}}) + cb = StateInjectCallback(cfg) + msgs = [Message(role='system', content='cwd ')] + _run(cb.on_task_begin(Runtime(), msgs)) + assert msgs[0].content == 'cwd ' # untouched + + +def test_state_inject_self_registered(): + import ms_agent.agent.templates # noqa: F401 + from ms_agent.callbacks import callbacks_mapping + assert 'state_inject' in callbacks_mapping diff --git a/tests/agent/test_subagent_depth.py b/tests/agent/test_subagent_depth.py new file mode 100644 index 000000000..3502f3153 --- /dev/null +++ b/tests/agent/test_subagent_depth.py @@ -0,0 +1,38 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import sys + +import pytest +from omegaconf import OmegaConf + +from ms_agent.config import Config +from ms_agent.tools.agent_tool import AgentTool + + +@pytest.fixture(autouse=True) +def _clean_argv(monkeypatch): + monkeypatch.setattr(sys, 'argv', ['ms-agent']) + + +def test_top_level_registers_delegations_at_depth_1(): + cfg = Config.from_task('general') # has subagents -> agent_tools + at = AgentTool(cfg) + assert at.enabled + depths = {s.subagent_depth for s in at._specs.values()} + assert depths == {1} # children are one level deeper + + +def test_depth_at_max_disables_delegation(): + cfg = Config.from_task('general') + cfg = OmegaConf.merge(cfg, OmegaConf.create({'_subagent_depth': 2})) + at = AgentTool(cfg) + assert not at.enabled # sub-agent at max depth cannot delegate + assert at._specs == {} + + +def test_depth_one_still_delegates_children_at_two(): + cfg = Config.from_task('general') + cfg = OmegaConf.merge(cfg, OmegaConf.create({'_subagent_depth': 1})) + at = AgentTool(cfg) + assert at.enabled + depths = {s.subagent_depth for s in at._specs.values()} + assert depths == {2} # which is the cap -> grandchildren blocked diff --git a/tests/agent/test_subagent_expand.py b/tests/agent/test_subagent_expand.py new file mode 100644 index 000000000..190b1ff75 --- /dev/null +++ b/tests/agent/test_subagent_expand.py @@ -0,0 +1,62 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from omegaconf import OmegaConf + +from ms_agent.agent.templates.subagent_expand import expand_subagents + + +def test_expand_creates_definitions(): + cfg = OmegaConf.create({'subagents': ['explore', 'research'], 'tools': {}}) + expand_subagents(cfg) + defs = cfg.tools.agent_tools.definitions + names = [d.tool_name for d in defs] + assert names == ['explore', 'research'] + assert all(d.config_path.startswith('template://') for d in defs) + assert all(d.output_mode == 'final_message' for d in defs) + # Standard {request: string} parameter schema is attached. + assert defs[0].parameters['required'] == ['request'] + + +def test_explicit_definition_wins(): + cfg = OmegaConf.create({ + 'subagents': ['explore'], + 'tools': { + 'agent_tools': { + 'definitions': [{ + 'tool_name': 'explore', + 'config_path': '/custom/path', + 'description': 'mine', + }] + } + }, + }) + expand_subagents(cfg) + defs = cfg.tools.agent_tools.definitions + assert len(defs) == 1 + assert defs[0].config_path == '/custom/path' # not overwritten + + +def test_no_subagents_is_untouched(): + cfg = OmegaConf.create({'tools': {'file_system': {}}}) + expand_subagents(cfg) + assert 'agent_tools' not in cfg.tools + + +def test_partial_overlap_merges(): + cfg = OmegaConf.create({ + 'subagents': ['explore', 'build'], + 'tools': { + 'agent_tools': { + 'definitions': [{ + 'tool_name': 'explore', + 'config_path': '/custom/explore', + }] + } + }, + }) + expand_subagents(cfg) + by_name = { + d.tool_name: d.config_path + for d in cfg.tools.agent_tools.definitions + } + assert by_name['explore'] == '/custom/explore' # kept + assert by_name['build'] == 'template://build' # generated diff --git a/tests/agent/test_subagent_limit.py b/tests/agent/test_subagent_limit.py new file mode 100644 index 000000000..be749b83c --- /dev/null +++ b/tests/agent/test_subagent_limit.py @@ -0,0 +1,60 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import asyncio + +from omegaconf import OmegaConf + +from ms_agent.agent.runtime import Runtime +from ms_agent.agent.templates.harness.subagent_limit import \ + SubagentLimitCallback +from ms_agent.llm.utils import Message + + +def _run(c): + return asyncio.run(c) + + +def _deleg(i): + return {'tool_name': f'agent_tools---research', 'arguments': '{"i": %d}' % i} + + +def test_truncates_excess_delegations(): + cb = SubagentLimitCallback( + OmegaConf.create({'subagent_limit': {'max_parallel': 4}})) + rt = Runtime() + calls = [_deleg(i) for i in range(6)] + [{ + 'tool_name': 'file_system---grep', + 'arguments': '{}' + }] + msgs = [Message(role='assistant', tool_calls=calls)] + _run(cb.on_tool_call(rt, msgs)) + deleg = [tc for tc in msgs[0].tool_calls + if tc['tool_name'].startswith('agent_tools---')] + other = [tc for tc in msgs[0].tool_calls + if not tc['tool_name'].startswith('agent_tools---')] + assert len(deleg) == 4 # capped + assert len(other) == 1 # non-delegation kept + _run(cb.after_tool_call(rt, msgs)) + assert any('[SUBAGENT_LIMIT]' in m.content for m in msgs + if m.role == 'user') + + +def test_under_limit_untouched(): + cb = SubagentLimitCallback( + OmegaConf.create({'subagent_limit': {'max_parallel': 4}})) + rt = Runtime() + msgs = [Message(role='assistant', tool_calls=[_deleg(0), _deleg(1)])] + _run(cb.on_tool_call(rt, msgs)) + assert len(msgs[0].tool_calls) == 2 + _run(cb.after_tool_call(rt, msgs)) + assert len(msgs) == 1 # no note injected + + +def test_non_delegation_unaffected(): + cb = SubagentLimitCallback( + OmegaConf.create({'subagent_limit': {'max_parallel': 1}})) + rt = Runtime() + calls = [{'tool_name': 'file_system---grep', 'arguments': '{}'} + for _ in range(5)] + msgs = [Message(role='assistant', tool_calls=calls)] + _run(cb.on_tool_call(rt, msgs)) + assert len(msgs[0].tool_calls) == 5 # non-delegation never capped diff --git a/tests/agent/test_template_registry.py b/tests/agent/test_template_registry.py new file mode 100644 index 000000000..698e675dd --- /dev/null +++ b/tests/agent/test_template_registry.py @@ -0,0 +1,42 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os + +from ms_agent.agent.templates import registry + +BUILTINS = ['general', 'plan', 'explore', 'build', 'research'] + + +def test_builtin_templates_resolve(): + for name in BUILTINS: + d = registry.resolve_template_dir(name) + assert d and os.path.isdir(d), name + assert os.path.isfile(os.path.join(d, 'agent.yaml')), name + + +def test_unknown_name_returns_none(): + assert registry.resolve_template_dir('definitely-not-a-template') is None + + +def test_resolve_source_passthrough_for_paths_and_repo_ids(): + # Existing local paths win unchanged. + assert registry.resolve_template_source('/tmp') == '/tmp' + # ModelScope ids contain '/', so are never treated as template names. + assert registry.resolve_template_source('org/repo') == 'org/repo' + + +def test_resolve_source_hits_builtin(): + out = registry.resolve_template_source('explore') + assert out.endswith(os.path.join('templates', 'explore')) + + +def test_list_primary_and_subagent(): + primary = {t['name'] for t in registry.list_templates('primary')} + sub = {t['name'] for t in registry.list_templates('subagent')} + # `general` is mode=all, so it appears in both views. + assert {'general', 'plan'} <= primary + assert {'general', 'explore', 'build', 'research'} <= sub + + +def test_when_to_use_nonempty(): + for name in BUILTINS: + assert registry.get_when_to_use(name).strip() diff --git a/tests/agent/test_templates_smoke.py b/tests/agent/test_templates_smoke.py new file mode 100644 index 000000000..0e7b0cdb7 --- /dev/null +++ b/tests/agent/test_templates_smoke.py @@ -0,0 +1,45 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import sys + +import pytest + +from ms_agent.config import Config + +ALL = ['general', 'plan', 'explore', 'build', 'research'] + + +@pytest.fixture(autouse=True) +def _clean_argv(monkeypatch): + # Config.from_task() parses sys.argv for `--key value` overrides; under + # pytest the runner's argv would otherwise trip its assertion. + monkeypatch.setattr(sys, 'argv', ['ms-agent']) + + +@pytest.mark.parametrize('name', ALL) +def test_template_loads(name): + cfg = Config.from_task(name) + assert cfg.llm.model + assert hasattr(cfg, 'tools') + assert cfg.name in ('agent.yaml', 'agent.yml') + + +@pytest.mark.parametrize('name', ['plan', 'explore']) +def test_readonly_templates_have_no_write_tools(name): + cfg = Config.from_task(name) + inc = list(cfg.tools.file_system.include) + assert 'read_file' in inc + assert 'write_file' not in inc + assert 'edit_file' not in inc + + +def test_general_expands_subagents(): + cfg = Config.from_task('general') + names = {d.tool_name for d in cfg.tools.agent_tools.definitions} + assert names == {'explore', 'build', 'research'} + + +def test_research_mounts_harness(): + cfg = Config.from_task('research') + cbs = list(cfg.callbacks) + assert 'round_reminder' in cbs + assert 'stop_gate' in cbs diff --git a/tests/agent/test_todo_gate.py b/tests/agent/test_todo_gate.py new file mode 100644 index 000000000..265c235d1 --- /dev/null +++ b/tests/agent/test_todo_gate.py @@ -0,0 +1,63 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import asyncio +import json + +from omegaconf import OmegaConf + +from ms_agent.agent.runtime import Runtime +from ms_agent.agent.templates.harness.todo_gate import TodoGateCallback +from ms_agent.llm.utils import Message + + +def _run(c): + return asyncio.run(c) + + +def _write_plan(tmp_path, todos): + (tmp_path / 'plan.json').write_text( + json.dumps({'todos': todos}), encoding='utf-8') + + +def test_blocks_stop_when_incomplete(tmp_path): + _write_plan(tmp_path, [ + {'id': '1', 'content': 'a', 'status': 'completed'}, + {'id': '2', 'content': 'b', 'status': 'in_progress'}, + ]) + cb = TodoGateCallback(OmegaConf.create({'output_dir': str(tmp_path)})) + rt = Runtime(should_stop=True) + msgs = [Message(role='assistant', content='done')] + _run(cb.after_tool_call(rt, msgs)) + assert rt.should_stop is False + assert any('[TODO_GATE]' in m.content for m in msgs if m.role == 'user') + + +def test_allows_stop_when_all_done(tmp_path): + _write_plan(tmp_path, [ + {'id': '1', 'content': 'a', 'status': 'completed'}, + {'id': '2', 'content': 'b', 'status': 'cancelled'}, + ]) + cb = TodoGateCallback(OmegaConf.create({'output_dir': str(tmp_path)})) + rt = Runtime(should_stop=True) + _run(cb.after_tool_call(rt, [Message(role='assistant', content='done')])) + assert rt.should_stop is True + + +def test_no_plan_file_allows_stop(tmp_path): + cb = TodoGateCallback(OmegaConf.create({'output_dir': str(tmp_path)})) + rt = Runtime(should_stop=True) + _run(cb.after_tool_call(rt, [Message(role='assistant', content='done')])) + assert rt.should_stop is True + + +def test_respects_max_reminders(tmp_path): + _write_plan(tmp_path, + [{'id': '1', 'content': 'a', 'status': 'pending'}]) + cb = TodoGateCallback( + OmegaConf.create({'output_dir': str(tmp_path), + 'todo_gate': {'max_reminders': 1}})) + rt1 = Runtime(should_stop=True) + _run(cb.after_tool_call(rt1, [Message(role='assistant', content='x')])) + assert rt1.should_stop is False # blocked once + rt2 = Runtime(should_stop=True) + _run(cb.after_tool_call(rt2, [Message(role='assistant', content='x')])) + assert rt2.should_stop is True # budget spent -> allowed