Skip to content
Open
Prev Previous commit
Next Next commit
feat: add lethal trifecta session risk analysis and annotation-based …
…filtering to retrieve_tools (Spec 035 F2+F4)

F2: Session risk analysis examines all connected servers' tool annotations
to detect the "lethal trifecta" — open-world access + destructive capabilities
+ write access. Returns risk level (high/medium/low) in every retrieve_tools
response as session_risk, with a warning when the trifecta is present.

F4: Three new optional boolean parameters (read_only_only, exclude_destructive,
exclude_open_world) allow agents to self-restrict tool discovery scope based on
MCP annotation hints. Nil annotations are treated as most permissive per spec.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
  • Loading branch information
claude committed Mar 13, 2026
commit d189b82f70b512907903d768352054f31c4c1372
68 changes: 68 additions & 0 deletions internal/server/mcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,11 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
debugMode := request.GetBool("debug", false)
explainTool := request.GetString("explain_tool", "")

// Spec 035 F4: Annotation-based filtering parameters
readOnlyOnly := request.GetBool("read_only_only", false)
excludeDestructive := request.GetBool("exclude_destructive", false)
excludeOpenWorld := request.GetBool("exclude_open_world", false)

// Build arguments map for activity logging (Spec 024)
args := map[string]interface{}{
"query": query,
Expand All @@ -897,6 +902,15 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
if explainTool != "" {
args["explain_tool"] = explainTool
}
if readOnlyOnly {
args["read_only_only"] = true
}
if excludeDestructive {
args["exclude_destructive"] = true
}
if excludeOpenWorld {
args["exclude_open_world"] = true
}

// Validate limit
if limit > 100 {
Expand Down Expand Up @@ -929,6 +943,40 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
results = filtered
}

// Spec 035 F4: Resolve annotations for each result and apply annotation-based filtering
// before building the MCP tool response. This allows agents to self-restrict discovery.
annotationFilterActive := readOnlyOnly || excludeDestructive || excludeOpenWorld
if annotationFilterActive {
var annotatedResults []annotatedSearchResult
for i, result := range results {
serverName := result.Tool.ServerName
toolName := result.Tool.Name
if serverName == "" {
if parts := strings.SplitN(result.Tool.Name, ":", 2); len(parts) == 2 {
serverName = parts[0]
toolName = parts[1]
}
}
var annotations *config.ToolAnnotations
if serverName != "" {
annotations = p.lookupToolAnnotations(serverName, toolName)
}
annotatedResults = append(annotatedResults, annotatedSearchResult{
serverName: serverName,
toolName: toolName,
annotations: annotations,
resultIndex: i,
})
}

filtered := filterByAnnotations(annotatedResults, readOnlyOnly, excludeDestructive, excludeOpenWorld)
var filteredResults []*config.SearchResult
for _, ar := range filtered {
filteredResults = append(filteredResults, results[ar.resultIndex])
}
results = filteredResults
}

// Convert results to MCP tool format for LLM compatibility
var mcpTools []map[string]interface{}
for _, result := range results {
Expand Down Expand Up @@ -1020,6 +1068,26 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
"usage_instructions": usageInstructions,
}

// Spec 035 F2: Session risk analysis — analyze all connected servers' tool annotations
// to detect the "lethal trifecta" risk combination.
if p.mainServer != nil && p.mainServer.runtime != nil {
if sup := p.mainServer.runtime.Supervisor(); sup != nil {
snapshot := sup.StateView().Snapshot()
risk := analyzeSessionRisk(snapshot)
sessionRisk := map[string]interface{}{
"level": risk.Level,
"has_open_world_tools": risk.HasOpenWorld,
"has_destructive_tools": risk.HasDestructive,
"has_write_tools": risk.HasWrite,
"lethal_trifecta": risk.LethalTrifecta,
}
if risk.Warning != "" {
sessionRisk["warning"] = risk.Warning
}
response["session_risk"] = sessionRisk
}
}

// Add debug information if requested
if debugMode {
response["debug"] = map[string]interface{}{
Expand Down
157 changes: 157 additions & 0 deletions internal/server/mcp_annotations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package server

import (
"github.com/smart-mcp-proxy/mcpproxy-go/internal/config"
"github.com/smart-mcp-proxy/mcpproxy-go/internal/runtime/stateview"
)

// SessionRisk holds the result of analyzing all connected servers' tool annotations
// for the "lethal trifecta" risk combination (Spec 035 F2).
type SessionRisk struct {
Level string `json:"level"` // "high", "medium", "low"
HasOpenWorld bool `json:"has_open_world"` // Any tool with openWorldHint=true or nil
HasDestructive bool `json:"has_destructive"` // Any tool with destructiveHint=true or nil
HasWrite bool `json:"has_write"` // Any tool with readOnlyHint=false or nil
LethalTrifecta bool `json:"lethal_trifecta"` // All three categories present
Warning string `json:"warning,omitempty"`
}

// analyzeSessionRisk examines all connected servers' tool annotations to detect
// the "lethal trifecta" risk: open-world access + destructive capabilities + write access.
// Per MCP spec, nil annotation hints default to the most permissive interpretation:
// - openWorldHint nil → true (assumes open world)
// - destructiveHint nil → true (assumes destructive)
// - readOnlyHint nil → false (assumes not read-only, i.e., can write)
func analyzeSessionRisk(snapshot *stateview.ServerStatusSnapshot) SessionRisk {
var hasOpenWorld, hasDestructive, hasWrite bool

for _, server := range snapshot.Servers {
if !server.Connected {
continue
}

for _, tool := range server.Tools {
classifyToolRisk(tool.Annotations, &hasOpenWorld, &hasDestructive, &hasWrite)
}
}

// Count how many risk categories are present
riskCount := 0
if hasOpenWorld {
riskCount++
}
if hasDestructive {
riskCount++
}
if hasWrite {
riskCount++
}

risk := SessionRisk{
HasOpenWorld: hasOpenWorld,
HasDestructive: hasDestructive,
HasWrite: hasWrite,
}

switch {
case riskCount >= 3:
risk.Level = "high"
risk.LethalTrifecta = true
risk.Warning = "LETHAL TRIFECTA DETECTED: This session combines open-world access, " +
"destructive capabilities, and write access across connected servers. " +
"A prompt injection attack could chain these to cause significant damage. " +
"Consider using annotation filters (read_only_only, exclude_destructive, exclude_open_world) " +
"to restrict tool discovery."
case riskCount == 2:
risk.Level = "medium"
default:
risk.Level = "low"
}

return risk
}

// classifyToolRisk updates the risk flags based on a single tool's annotations.
// Nil hints are treated as their MCP spec defaults (most permissive).
func classifyToolRisk(annotations *config.ToolAnnotations, hasOpenWorld, hasDestructive, hasWrite *bool) {
if annotations == nil {
// No annotations at all — apply MCP spec defaults (all permissive)
*hasOpenWorld = true
*hasDestructive = true
*hasWrite = true
return
}

// openWorldHint: nil or true → open world
if annotations.OpenWorldHint == nil || *annotations.OpenWorldHint {
*hasOpenWorld = true
}

// destructiveHint: nil or true → destructive
if annotations.DestructiveHint == nil || *annotations.DestructiveHint {
*hasDestructive = true
}

// readOnlyHint: nil or false → not read-only (write capable)
if annotations.ReadOnlyHint == nil || !*annotations.ReadOnlyHint {
*hasWrite = true
}
}

// annotatedSearchResult pairs a search result with its resolved annotations
// for use in annotation-based filtering (Spec 035 F4).
type annotatedSearchResult struct {
serverName string
toolName string
annotations *config.ToolAnnotations
resultIndex int // Index into the original search results slice
}

// filterByAnnotations filters annotated search results based on annotation criteria.
// Returns only the results that pass all active filters.
//
// Filter semantics (per MCP spec, nil hints default to most permissive):
// - readOnlyOnly: keep only tools with readOnlyHint=true (explicit)
// - excludeDestructive: exclude tools with destructiveHint=true or nil
// - excludeOpenWorld: exclude tools with openWorldHint=true or nil
func filterByAnnotations(tools []annotatedSearchResult, readOnlyOnly, excludeDestructive, excludeOpenWorld bool) []annotatedSearchResult {
// Fast path: no filters active
if !readOnlyOnly && !excludeDestructive && !excludeOpenWorld {
return tools
}

var filtered []annotatedSearchResult
for _, tool := range tools {
if shouldExclude(tool.annotations, readOnlyOnly, excludeDestructive, excludeOpenWorld) {
continue
}
filtered = append(filtered, tool)
}
return filtered
}

// shouldExclude returns true if a tool should be excluded based on its annotations and active filters.
func shouldExclude(annotations *config.ToolAnnotations, readOnlyOnly, excludeDestructive, excludeOpenWorld bool) bool {
if readOnlyOnly {
// Must have explicit readOnlyHint=true to pass
if annotations == nil || annotations.ReadOnlyHint == nil || !*annotations.ReadOnlyHint {
return true
}
}

if excludeDestructive {
// Exclude if destructiveHint is true or nil (default is true per spec)
if annotations == nil || annotations.DestructiveHint == nil || *annotations.DestructiveHint {
return true
}
}

if excludeOpenWorld {
// Exclude if openWorldHint is true or nil (default is true per spec)
if annotations == nil || annotations.OpenWorldHint == nil || *annotations.OpenWorldHint {
return true
}
}

return false
}
Loading
Loading