feat: add lethal trifecta session risk analysis and annotation-based …

…filtering to retrieve_tools (Spec 035 F2+F4) F2: Session risk analysis examines all connected servers' tool annotations to detect the "lethal trifecta" — open-world access + destructive capabilities + write access. Returns risk level (high/medium/low) in every retrieve_tools response as session_risk, with a warning when the trifecta is present. F4: Three new optional boolean parameters (read_only_only, exclude_destructive, exclude_open_world) allow agents to self-restrict tool discovery scope based on MCP annotation hints. Nil annotations are treated as most permissive per spec. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
smart-mcp-proxy · Dumbris · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
commit d189b82f70b512907903d768352054f31c4c1372
diff --git a/internal/server/mcp.go b/internal/server/mcp.go
@@ -883,6 +883,11 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
 	debugMode := request.GetBool("debug", false)
 	explainTool := request.GetString("explain_tool", "")
 
+	// Spec 035 F4: Annotation-based filtering parameters
+	readOnlyOnly := request.GetBool("read_only_only", false)
+	excludeDestructive := request.GetBool("exclude_destructive", false)
+	excludeOpenWorld := request.GetBool("exclude_open_world", false)
+
 	// Build arguments map for activity logging (Spec 024)
 	args := map[string]interface{}{
 		"query": query,
@@ -897,6 +902,15 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
 	if explainTool != "" {
 		args["explain_tool"] = explainTool
 	}
+	if readOnlyOnly {
+		args["read_only_only"] = true
+	}
+	if excludeDestructive {
+		args["exclude_destructive"] = true
+	}
+	if excludeOpenWorld {
+		args["exclude_open_world"] = true
+	}
 
 	// Validate limit
 	if limit > 100 {
@@ -929,6 +943,40 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
 		results = filtered
 	}
 
+	// Spec 035 F4: Resolve annotations for each result and apply annotation-based filtering
+	// before building the MCP tool response. This allows agents to self-restrict discovery.
+	annotationFilterActive := readOnlyOnly || excludeDestructive || excludeOpenWorld
+	if annotationFilterActive {
+		var annotatedResults []annotatedSearchResult
+		for i, result := range results {
+			serverName := result.Tool.ServerName
+			toolName := result.Tool.Name
+			if serverName == "" {
+				if parts := strings.SplitN(result.Tool.Name, ":", 2); len(parts) == 2 {
+					serverName = parts[0]
+					toolName = parts[1]
+				}
+			}
+			var annotations *config.ToolAnnotations
+			if serverName != "" {
+				annotations = p.lookupToolAnnotations(serverName, toolName)
+			}
+			annotatedResults = append(annotatedResults, annotatedSearchResult{
+				serverName:  serverName,
+				toolName:    toolName,
+				annotations: annotations,
+				resultIndex: i,
+			})
+		}
+
+		filtered := filterByAnnotations(annotatedResults, readOnlyOnly, excludeDestructive, excludeOpenWorld)
+		var filteredResults []*config.SearchResult
+		for _, ar := range filtered {
+			filteredResults = append(filteredResults, results[ar.resultIndex])
+		}
+		results = filteredResults
+	}
+
 	// Convert results to MCP tool format for LLM compatibility
 	var mcpTools []map[string]interface{}
 	for _, result := range results {
@@ -1020,6 +1068,26 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
 		"usage_instructions": usageInstructions,
 	}
 
+	// Spec 035 F2: Session risk analysis — analyze all connected servers' tool annotations
+	// to detect the "lethal trifecta" risk combination.
+	if p.mainServer != nil && p.mainServer.runtime != nil {
+		if sup := p.mainServer.runtime.Supervisor(); sup != nil {
+			snapshot := sup.StateView().Snapshot()
+			risk := analyzeSessionRisk(snapshot)
+			sessionRisk := map[string]interface{}{
+				"level":                 risk.Level,
+				"has_open_world_tools":  risk.HasOpenWorld,
+				"has_destructive_tools": risk.HasDestructive,
+				"has_write_tools":       risk.HasWrite,
+				"lethal_trifecta":       risk.LethalTrifecta,
+			}
+			if risk.Warning != "" {
+				sessionRisk["warning"] = risk.Warning
+			}
+			response["session_risk"] = sessionRisk
+		}
+	}
+
 	// Add debug information if requested
 	if debugMode {
 		response["debug"] = map[string]interface{}{

diff --git a/internal/server/mcp_annotations.go b/internal/server/mcp_annotations.go
@@ -0,0 +1,157 @@
+package server
+
+import (
+	"github.com/smart-mcp-proxy/mcpproxy-go/internal/config"
+	"github.com/smart-mcp-proxy/mcpproxy-go/internal/runtime/stateview"
+)
+
+// SessionRisk holds the result of analyzing all connected servers' tool annotations
+// for the "lethal trifecta" risk combination (Spec 035 F2).
+type SessionRisk struct {
+	Level          string `json:"level"`           // "high", "medium", "low"
+	HasOpenWorld   bool   `json:"has_open_world"`  // Any tool with openWorldHint=true or nil
+	HasDestructive bool   `json:"has_destructive"` // Any tool with destructiveHint=true or nil
+	HasWrite       bool   `json:"has_write"`       // Any tool with readOnlyHint=false or nil
+	LethalTrifecta bool   `json:"lethal_trifecta"` // All three categories present
+	Warning        string `json:"warning,omitempty"`
+}
+
+// analyzeSessionRisk examines all connected servers' tool annotations to detect
+// the "lethal trifecta" risk: open-world access + destructive capabilities + write access.
+// Per MCP spec, nil annotation hints default to the most permissive interpretation:
+//   - openWorldHint nil → true (assumes open world)
+//   - destructiveHint nil → true (assumes destructive)
+//   - readOnlyHint nil → false (assumes not read-only, i.e., can write)
+func analyzeSessionRisk(snapshot *stateview.ServerStatusSnapshot) SessionRisk {
+	var hasOpenWorld, hasDestructive, hasWrite bool
+
+	for _, server := range snapshot.Servers {
+		if !server.Connected {
+			continue
+		}
+
+		for _, tool := range server.Tools {
+			classifyToolRisk(tool.Annotations, &hasOpenWorld, &hasDestructive, &hasWrite)
+		}
+	}
+
+	// Count how many risk categories are present
+	riskCount := 0
+	if hasOpenWorld {
+		riskCount++
+	}
+	if hasDestructive {
+		riskCount++
+	}
+	if hasWrite {
+		riskCount++
+	}
+
+	risk := SessionRisk{
+		HasOpenWorld:   hasOpenWorld,
+		HasDestructive: hasDestructive,
+		HasWrite:       hasWrite,
+	}
+
+	switch {
+	case riskCount >= 3:
+		risk.Level = "high"
+		risk.LethalTrifecta = true
+		risk.Warning = "LETHAL TRIFECTA DETECTED: This session combines open-world access, " +
+			"destructive capabilities, and write access across connected servers. " +
+			"A prompt injection attack could chain these to cause significant damage. " +
+			"Consider using annotation filters (read_only_only, exclude_destructive, exclude_open_world) " +
+			"to restrict tool discovery."
+	case riskCount == 2:
+		risk.Level = "medium"
+	default:
+		risk.Level = "low"
+	}
+
+	return risk
+}
+
+// classifyToolRisk updates the risk flags based on a single tool's annotations.
+// Nil hints are treated as their MCP spec defaults (most permissive).
+func classifyToolRisk(annotations *config.ToolAnnotations, hasOpenWorld, hasDestructive, hasWrite *bool) {
+	if annotations == nil {
+		// No annotations at all — apply MCP spec defaults (all permissive)
+		*hasOpenWorld = true
+		*hasDestructive = true
+		*hasWrite = true
+		return
+	}
+
+	// openWorldHint: nil or true → open world
+	if annotations.OpenWorldHint == nil || *annotations.OpenWorldHint {
+		*hasOpenWorld = true
+	}
+
+	// destructiveHint: nil or true → destructive
+	if annotations.DestructiveHint == nil || *annotations.DestructiveHint {
+		*hasDestructive = true
+	}
+
+	// readOnlyHint: nil or false → not read-only (write capable)
+	if annotations.ReadOnlyHint == nil || !*annotations.ReadOnlyHint {
+		*hasWrite = true
+	}
+}
+
+// annotatedSearchResult pairs a search result with its resolved annotations
+// for use in annotation-based filtering (Spec 035 F4).
+type annotatedSearchResult struct {
+	serverName  string
+	toolName    string
+	annotations *config.ToolAnnotations
+	resultIndex int // Index into the original search results slice
+}
+
+// filterByAnnotations filters annotated search results based on annotation criteria.
+// Returns only the results that pass all active filters.
+//
+// Filter semantics (per MCP spec, nil hints default to most permissive):
+//   - readOnlyOnly: keep only tools with readOnlyHint=true (explicit)
+//   - excludeDestructive: exclude tools with destructiveHint=true or nil
+//   - excludeOpenWorld: exclude tools with openWorldHint=true or nil
+func filterByAnnotations(tools []annotatedSearchResult, readOnlyOnly, excludeDestructive, excludeOpenWorld bool) []annotatedSearchResult {
+	// Fast path: no filters active
+	if !readOnlyOnly && !excludeDestructive && !excludeOpenWorld {
+		return tools
+	}
+
+	var filtered []annotatedSearchResult
+	for _, tool := range tools {
+		if shouldExclude(tool.annotations, readOnlyOnly, excludeDestructive, excludeOpenWorld) {
+			continue
+		}
+		filtered = append(filtered, tool)
+	}
+	return filtered
+}
+
+// shouldExclude returns true if a tool should be excluded based on its annotations and active filters.
+func shouldExclude(annotations *config.ToolAnnotations, readOnlyOnly, excludeDestructive, excludeOpenWorld bool) bool {
+	if readOnlyOnly {
+		// Must have explicit readOnlyHint=true to pass
+		if annotations == nil || annotations.ReadOnlyHint == nil || !*annotations.ReadOnlyHint {
+			return true
+		}
+	}
+
+	if excludeDestructive {
+		// Exclude if destructiveHint is true or nil (default is true per spec)
+		if annotations == nil || annotations.DestructiveHint == nil || *annotations.DestructiveHint {
+			return true
+		}
+	}
+
+	if excludeOpenWorld {
+		// Exclude if openWorldHint is true or nil (default is true per spec)
+		if annotations == nil || annotations.OpenWorldHint == nil || *annotations.OpenWorldHint {
+			return true
+		}
+	}
+
+	return false
+}