From fa0943d3d7f8df360f2493072f16036d0fa351a6 Mon Sep 17 00:00:00 2001
From: duguwanglong <duguwanglong@163.com>
Date: Fri, 24 Apr 2026 16:14:36 +0800
Subject: [PATCH] fix(tool/skill): cap description preview at 500 chars and
 load full SKILL.md on demand
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `skill` tool exposes two surfaces, both of which were silently dropping
critical content:

1. The tool's meta-description (shipped in every system prompt) listed each
   skill's full frontmatter description (up to 1024 chars). Long descriptions
   bloated the prompt and the model often acted on a partial preview without
   loading the full file.

2. When the model called `skill(name=...)`, ToolRegistry's auto-truncate path
   silently cropped the returned SKILL.md at 10 KB / 200 lines (head-only),
   dropping the workflow steps, references, and constraints that authors
   typically place at the end of the file. Users reported "skill.md 后面的
   感觉就完全丢失了".

This change implements progressive disclosure (mirroring hermes-agent's
`skills_list` / `skill_view` split):

- `build_description()` now caps each skill's preview at
  MAX_SKILL_DESCRIPTION_PREVIEW_CHARS (500) using head + tail truncation,
  preserving both the opening (scope/triggers) and the closing (hard
  constraints like "必须先加载本 skill"). Adds an explicit instruction
  telling the model the previews are summaries and it MUST call
  `skill(name=...)` to load the full SKILL.md before acting.

- `skill_tool_impl()` sets `truncated=True` on its `ToolResult` to opt out
  of the registry's auto-truncate pass, so the full SKILL.md (verified end-
  to-end with 18.8 KB / 500 lines `agent-browser`) reaches the model intact.
  Also emits a `skill.load.full_content` log event for auditability.

Empirical impact on the 12 existing skills:
- System prompt index: 4.6 KB total (only `tool-builder` 614→500 is trimmed)
- Load on demand: previously 8/12 skills lost their tail; now all 12 are
  delivered in full

Tests: 15 new in `tests/tool/test_skill_tool_description.py` covering
truncation algorithm, build_description output, and end-to-end no-truncation
through `ToolRegistry.execute()`.

Made-with: Cursor
---
 flocks/tool/system/skill.py               |  97 +++++++-
 tests/tool/test_skill_tool_description.py | 271 ++++++++++++++++++++++
 2 files changed, 359 insertions(+), 9 deletions(-)
 create mode 100644 tests/tool/test_skill_tool_description.py

diff --git a/flocks/tool/system/skill.py b/flocks/tool/system/skill.py
index 5ab4c55c1..f61f362c8 100644
--- a/flocks/tool/system/skill.py
+++ b/flocks/tool/system/skill.py
@@ -19,29 +19,90 @@
 log = Log.create(service="tool.skill")
 
 
+# Maximum characters of a skill's description shown in the `skill` tool's
+# meta-description (the tool index that ships with the system prompt).
+#
+# Why a limit at all?
+#   The `skill` tool's description is injected into every LLM call as part of
+#   the tool schema. Listing the full SKILL.md frontmatter description (allowed
+#   up to 1024 chars by `Skill._is_valid_description`) for every skill makes
+#   the prompt grow linearly with the number of skills — and most of that text
+#   is "how to use" detail that the model only needs *after* it decides to
+#   load the skill.
+#
+# Why 500?
+#   Empirically, the descriptions in `flocks/.flocks/plugins/skills/*/SKILL.md`
+#   cluster between 60 and 614 characters; 500 chars preserves ~96% of the
+#   total content (only one outlier needs trimming) while keeping the worst-
+#   case cost of the index bounded. Critically, threat-intel/EDR skills tend
+#   to put their hard constraints ("must load this skill before any X tool")
+#   at the *end* of the description, so we keep both head and tail.
+MAX_SKILL_DESCRIPTION_PREVIEW_CHARS = 500
+
+
+def _truncate_skill_description(description: str, name: str) -> str:
+    """
+    Cap a single skill's description at MAX_SKILL_DESCRIPTION_PREVIEW_CHARS.
+
+    Uses head + tail truncation so both the opening (scope/triggers) and the
+    closing (hard constraints, "must load first") survive. Inserts a marker
+    that tells the model how to fetch the full content via the `skill` tool.
+    """
+    if len(description) <= MAX_SKILL_DESCRIPTION_PREVIEW_CHARS:
+        return description
+
+    marker = f' … [truncated; load full SKILL.md via skill(name="{name}") before acting] … '
+    available = MAX_SKILL_DESCRIPTION_PREVIEW_CHARS - len(marker)
+    if available < 80:
+        # Marker alone is unusually long (very long skill name); fall back to
+        # plain head truncation so we still emit something useful.
+        return description[: MAX_SKILL_DESCRIPTION_PREVIEW_CHARS - 1] + "…"
+
+    head_size = (available * 3) // 5  # ~60% head
+    tail_size = available - head_size
+    return description[:head_size] + marker + description[-tail_size:]
+
+
 def build_description(skills: List[SkillInfo]) -> str:
-    """Build tool description with available skills"""
+    """Build tool description with available skills.
+
+    Each skill's description is capped at MAX_SKILL_DESCRIPTION_PREVIEW_CHARS
+    (head + tail). The model is instructed to call `skill(name=...)` to
+    obtain the full SKILL.md when it decides to act on a skill.
+    """
     if not skills:
         return "Load a skill to get detailed instructions for a specific task. No skills are currently available."
-    
+
     # Match Flocks's format: space-separated, no newlines
     parts = [
         "Load a skill to get detailed instructions for a specific task.",
         "Skills provide specialized knowledge and step-by-step guidance.",
         "Use this when a task matches an available skill's description.",
+        # Strong, explicit guidance: the descriptions below are PREVIEWS only.
+        # The model must call this tool to get the full SKILL.md before
+        # actually executing the skill's workflow.
+        (
+            "IMPORTANT: each <description> below is a preview that may be "
+            f"truncated to {MAX_SKILL_DESCRIPTION_PREVIEW_CHARS} chars. "
+            "It is enough to decide WHETHER a skill applies, but NOT enough "
+            "to execute it. Once you pick a skill, you MUST call "
+            "skill(name=\"<skill-name>\") to load the full SKILL.md before "
+            "running its steps or calling any tool the skill governs."
+        ),
         "<available_skills>",
     ]
-    
+
     for skill in skills:
+        preview = _truncate_skill_description(skill.description, skill.name)
         parts.extend([
             "  <skill>",
             f"    <name>{skill.name}</name>",
-            f"    <description>{skill.description}</description>",
+            f"    <description>{preview}</description>",
             "  </skill>",
         ])
-    
+
     parts.append("</available_skills>")
-    
+
     # Join with space like Flocks does: .join(" ")
     return " ".join(parts)
 
@@ -99,21 +160,39 @@ async def skill_tool_impl(
     
     # Get base directory
     skill_dir = os.path.dirname(location)
-    
+
     # Format output
     output = f"""## Skill: {skill.name}
 
 **Base directory**: {skill_dir}
 
 {content.strip()}"""
-    
+
+    # ``truncated=True`` here is intentional: it tells ToolRegistry's
+    # auto-truncate path (registry.py: "Auto-truncate output unless the tool
+    # already handled it") to leave our payload alone. The `skill` tool is the
+    # *load-on-demand* counterpart of the tiny preview that ships in the system
+    # prompt -- if the model just decided to load this skill, it needs the
+    # FULL SKILL.md to act on. Cropping it at 10 KB / 200 lines (the
+    # registry's defaults) silently drops the workflow steps, references, and
+    # constraints that authors typically place at the *end* of the file, which
+    # is the exact bug users were hitting (skill.md tail "感觉就完全丢失了").
+    # Mirrors hermes-agent's `skill_view`, which also returns content in full.
+    log.info("skill.load.full_content", {
+        "name": skill.name,
+        "bytes": len(output.encode("utf-8")),
+        "lines": output.count("\n") + 1,
+    })
+
     return ToolResult(
         success=True,
         output=output,
         title=f"Loaded skill: {skill.name}",
+        truncated=True,
         metadata={
             "name": skill.name,
-            "dir": skill_dir
+            "dir": skill_dir,
+            "auto_truncate_bypassed": True,
         }
     )
 
diff --git a/tests/tool/test_skill_tool_description.py b/tests/tool/test_skill_tool_description.py
new file mode 100644
index 000000000..a1d874a9a
--- /dev/null
+++ b/tests/tool/test_skill_tool_description.py
@@ -0,0 +1,271 @@
+"""Tests for flocks.tool.system.skill.
+
+Two complementary aspects of the skill tool's progressive-disclosure design
+are exercised here:
+
+1. ``build_description`` — the meta-description that ships in every system
+   prompt. Each skill's description is capped at
+   ``MAX_SKILL_DESCRIPTION_PREVIEW_CHARS`` using head + tail truncation, so
+   both the opening (scope/triggers) and the closing (hard constraints, "must
+   load this skill before X") survive. The model is explicitly told to call
+   ``skill(name=...)`` to load the full SKILL.md before acting.
+
+2. ``skill_tool`` (load on demand) — when the model actually calls the tool,
+   the FULL SKILL.md must come back unredacted. This is the load-on-demand
+   counterpart of the truncated preview, mirroring hermes-agent's
+   ``skill_view``. Without the explicit opt-out the registry would silently
+   crop SKILL.md at 10 KB / 200 lines (head-only), dropping the workflow
+   steps and references that authors put at the file's tail.
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from flocks.skill.skill import Skill, SkillInfo
+from flocks.tool.registry import ToolContext, ToolRegistry
+from flocks.tool.system.skill import (
+    MAX_SKILL_DESCRIPTION_PREVIEW_CHARS,
+    _truncate_skill_description,
+    build_description,
+    skill_tool_impl,
+)
+
+
+def _skill(name: str, description: str) -> SkillInfo:
+    return SkillInfo(name=name, description=description, location=f"/tmp/{name}/SKILL.md")
+
+
+def _make_ctx() -> ToolContext:
+    ctx = MagicMock(spec=ToolContext)
+    ctx.ask = AsyncMock(return_value=None)
+    ctx.metadata = MagicMock()
+    ctx.aborted = False
+    ctx.extra = {}
+    ctx.agent = "rex"
+    ctx.session_id = "ses_test_skill"
+    return ctx
+
+
+# ---------------------------------------------------------------------------
+# _truncate_skill_description
+# ---------------------------------------------------------------------------
+
+
+class TestTruncateSkillDescription:
+    def test_short_description_is_unchanged(self):
+        desc = "Short skill description in two sentences. Use it for X."
+        assert _truncate_skill_description(desc, "x-skill") == desc
+
+    def test_description_at_cap_is_unchanged(self):
+        desc = "a" * MAX_SKILL_DESCRIPTION_PREVIEW_CHARS
+        assert _truncate_skill_description(desc, "x-skill") == desc
+
+    def test_long_description_is_truncated_under_cap(self):
+        desc = "a" * 1000
+        out = _truncate_skill_description(desc, "x-skill")
+        assert len(out) <= MAX_SKILL_DESCRIPTION_PREVIEW_CHARS
+
+    def test_truncation_keeps_head_and_tail(self):
+        head = "HEAD_MARKER " + ("a" * 600)
+        tail = ("z" * 200) + " TAIL_MARKER"
+        desc = head + tail
+        out = _truncate_skill_description(desc, "demo")
+        assert out.startswith("HEAD_MARKER")
+        assert out.endswith("TAIL_MARKER")
+
+    def test_truncation_inserts_skill_load_hint(self):
+        desc = "a" * 2000
+        out = _truncate_skill_description(desc, "onesec-use")
+        # The truncation marker must point the model at the right tool call,
+        # otherwise progressive disclosure breaks.
+        assert 'skill(name="onesec-use")' in out
+        assert "truncated" in out
+
+    def test_chinese_threat_intel_skill_keeps_trailing_constraint(self):
+        # Real-world shape: scope ... ability list ... HARD CONSTRAINT at end.
+        # The trailing "必须先加载本 skill" is the most decision-critical part
+        # for these skills.
+        head = "用于处理 OneSEC 终端安全平台相关任务，" + ("，能力列表" * 200)
+        tail = "本 skill 是 OneSEC 平台操作的唯一决策入口：必须先加载本 skill。"
+        desc = head + tail
+        out = _truncate_skill_description(desc, "onesec-use")
+        assert len(out) <= MAX_SKILL_DESCRIPTION_PREVIEW_CHARS
+        assert out.startswith("用于处理 OneSEC 终端安全平台相关任务")
+        assert out.endswith("必须先加载本 skill。")
+
+
+# ---------------------------------------------------------------------------
+# build_description
+# ---------------------------------------------------------------------------
+
+
+class TestBuildDescription:
+    def test_empty_skills_returns_no_skills_message(self):
+        out = build_description([])
+        assert "No skills are currently available" in out
+        assert "<available_skills>" not in out
+
+    def test_each_skill_emits_xml_block(self):
+        out = build_description(
+            [
+                _skill("alpha", "First skill description."),
+                _skill("beta", "Second skill description."),
+            ]
+        )
+        assert "<available_skills>" in out
+        assert "</available_skills>" in out
+        assert "<name>alpha</name>" in out
+        assert "<name>beta</name>" in out
+        assert "<description>First skill description.</description>" in out
+        assert "<description>Second skill description.</description>" in out
+
+    def test_includes_progressive_disclosure_instruction(self):
+        out = build_description([_skill("alpha", "demo")])
+        # Must direct the model at the load-on-demand tool call pattern,
+        # otherwise the model will try to act on the (possibly truncated)
+        # preview without first reading the full SKILL.md.
+        assert "skill(name=" in out
+        assert "preview" in out.lower() or "truncated" in out.lower()
+        assert "MUST" in out or "must" in out.lower()
+
+    def test_long_description_is_truncated_in_output(self):
+        long_desc = "a" * 2000
+        out = build_description([_skill("big", long_desc)])
+        # The full 2000-char string should never appear verbatim because it
+        # exceeds the per-skill preview cap.
+        assert long_desc not in out
+        assert 'skill(name="big")' in out
+
+    def test_short_descriptions_are_emitted_verbatim(self):
+        desc = "Short, single-sentence description that fits."
+        out = build_description([_skill("tiny", desc)])
+        # No truncation marker should appear for descriptions under the cap.
+        assert desc in out
+        assert "truncated" not in out.split("<available_skills>")[1]
+
+    def test_api_surface_returns_string(self):
+        out = build_description([_skill("alpha", "desc")])
+        assert isinstance(out, str)
+        assert len(out) > 0
+
+
+# ---------------------------------------------------------------------------
+# skill_tool_impl: load-on-demand must NOT truncate the SKILL.md content
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def fake_skill_dir():
+    """Create a temp directory with a SKILL.md that exceeds default truncation
+    limits (>10 KB AND >200 lines) so we can prove the registry's auto-truncate
+    pass leaves it alone.
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        skill_dir = Path(tmpdir) / "huge-skill"
+        skill_dir.mkdir()
+
+        body_lines = [
+            f"## Step {i}: do something important and clearly identifiable" for i in range(1, 401)
+        ]
+        # Tail sentinel proves we kept the END of the file (where authors
+        # typically put the operational steps and references).
+        body_lines.append("# TAIL_SENTINEL: this line MUST survive end-to-end")
+        body = "\n".join(body_lines)
+        # Pad with extra bytes so total comfortably exceeds the 10 KB cap.
+        padding = "x" * (15 * 1024)
+
+        (skill_dir / "SKILL.md").write_text(
+            f"""---
+name: huge-skill
+description: Test skill that exceeds default truncation limits.
+---
+
+# Huge Skill
+
+{padding}
+
+{body}
+""",
+            encoding="utf-8",
+        )
+        yield skill_dir
+
+
+class TestSkillLoadNoTruncation:
+    @pytest.mark.asyncio
+    async def test_skill_tool_returns_full_content(self, fake_skill_dir):
+        """skill_tool_impl must return the entire SKILL.md content, including
+        the tail, regardless of file size."""
+        skill_md = fake_skill_dir / "SKILL.md"
+        original = skill_md.read_text(encoding="utf-8")
+        # Sanity-check the fixture: it MUST exceed the registry defaults,
+        # otherwise the test isn't actually exercising the bypass.
+        assert len(original.encode("utf-8")) > 10 * 1024
+        assert original.count("\n") > 200
+
+        skill_info = SkillInfo(
+            name="huge-skill",
+            description="Test skill",
+            location=str(skill_md),
+        )
+
+        with patch.object(Skill, "get", AsyncMock(return_value=skill_info)):
+            result = await skill_tool_impl(_make_ctx(), name="huge-skill")
+
+        assert result.success is True
+        assert "TAIL_SENTINEL" in result.output, (
+            "tail of SKILL.md was dropped — load-on-demand must deliver "
+            "the full file, not the head-truncated version"
+        )
+        # The content should appear verbatim within the formatted output.
+        assert original.strip() in result.output
+
+    @pytest.mark.asyncio
+    async def test_skill_tool_sets_truncated_flag_to_bypass_registry(self, fake_skill_dir):
+        """skill_tool_impl must set ``truncated=True`` so ToolRegistry's
+        auto-truncate pass (registry.py: 'unless the tool already handled it')
+        leaves our payload alone."""
+        skill_info = SkillInfo(
+            name="huge-skill",
+            description="Test skill",
+            location=str(fake_skill_dir / "SKILL.md"),
+        )
+
+        with patch.object(Skill, "get", AsyncMock(return_value=skill_info)):
+            result = await skill_tool_impl(_make_ctx(), name="huge-skill")
+
+        assert result.truncated is True
+        assert result.metadata.get("auto_truncate_bypassed") is True
+
+    @pytest.mark.asyncio
+    async def test_registry_execute_does_not_truncate_skill_output(self, fake_skill_dir):
+        """End-to-end: when the `skill` tool is executed via ToolRegistry
+        (which is what the real session loop does), the auto-truncate path
+        must NOT fire and the tail must survive."""
+        skill_info = SkillInfo(
+            name="huge-skill",
+            description="Test skill",
+            location=str(fake_skill_dir / "SKILL.md"),
+        )
+
+        skill_tool = ToolRegistry.get("skill")
+        assert skill_tool is not None, "skill tool must be registered"
+
+        with patch.object(Skill, "all", AsyncMock(return_value=[skill_info])), \
+             patch.object(Skill, "get", AsyncMock(return_value=skill_info)):
+            result = await skill_tool.execute(_make_ctx(), name="huge-skill")
+
+        assert result.success is True
+        assert result.truncated is True  # explicit bypass, not registry-imposed
+        assert "TAIL_SENTINEL" in result.output
+        # The registry's truncation hint must NOT appear — that would mean
+        # the bypass failed.
+        assert "Use Grep to search the full content" not in result.output
+        assert "lines truncated" not in result.output
+        assert "bytes truncated" not in result.output