feat: store AI response output on ai.request and search.delegate spans (#529) (#530)

buger · claude · web-flow · commit 66eb28c67042 · 2026-03-18T19:07:22.000+03:00
Add onResult callback parameter to withSpan() that enriches spans with
result data before they close. Captures ai.output and ai.output_length
on ai.request spans, and search.delegate.output and
search.delegate.output_length on search.delegate spans.

Adds truncateForSpan() helper that preserves head + tail of long text
(first ~2K chars and last ~2K chars with omitted count) instead of
just truncating from the front, giving better context in traces.

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/npm/src/agent/ProbeAgent.js b/npm/src/agent/ProbeAgent.js
@@ -38,6 +38,7 @@ import { existsSync } from 'fs';
 import { readFile, stat, readdir } from 'fs/promises';
 import { resolve, isAbsolute, dirname, basename, normalize, sep } from 'path';
 import { TokenCounter } from './tokenCounter.js';
+import { truncateForSpan } from './simpleTelemetry.js';
 import { InMemoryStorageAdapter } from './storage/InMemoryStorageAdapter.js';
 import { HookManager, HOOK_TYPES } from './hooks/HookManager.js';
 import { SUPPORTED_IMAGE_EXTENSIONS, IMAGE_MIME_TYPES, isFormatSupportedByProvider } from './imageConfig.js';
@@ -4327,9 +4328,7 @@ Double-check your response based on the criteria above. If everything looks good
 
           let aiResult;
           if (this.tracer) {
-            const inputPreview = message.length > 1000
-              ? message.substring(0, 1000) + '... [truncated]'
-              : message;
+            const inputPreview = truncateForSpan(message, 4096);
 
             aiResult = await this.tracer.withSpan('ai.request', executeAIRequest, {
               'ai.model': this.model,
@@ -4340,6 +4339,12 @@ Double-check your response based on the criteria above. If everything looks good
               'max_tokens': maxResponseTokens,
               'temperature': 0.3,
               'message_count': currentMessages.length
+            }, (span, result) => {
+              const text = result?.finalText || '';
+              span.setAttributes({
+                'ai.output': truncateForSpan(text),
+                'ai.output_length': text.length
+              });
             });
           } else {
             aiResult = await executeAIRequest();
diff --git a/npm/src/agent/simpleTelemetry.js b/npm/src/agent/simpleTelemetry.js
@@ -2,6 +2,21 @@ import { existsSync, mkdirSync, createWriteStream } from 'fs';
 import { dirname } from 'path';
 import { patchConsole } from './otelLogBridge.js';
 
+/**
+ * Truncate text for span attributes, preserving head and tail for context.
+ * For text <= maxLen, returns as-is. For longer text, shows first half and
+ * last half of the budget with a separator indicating omitted chars.
+ * @param {string} text - The text to truncate
+ * @param {number} [maxLen=4096] - Maximum output length
+ * @returns {string} The truncated text
+ */
+export function truncateForSpan(text, maxLen = 4096) {
+  if (!text || text.length <= maxLen) return text || '';
+  const half = Math.floor((maxLen - 40) / 2); // 40 chars reserved for separator
+  const omitted = text.length - half * 2;
+  return text.substring(0, half) + `\n... [${omitted} chars omitted] ...\n` + text.substring(text.length - half);
+}
+
 /**
  * Simple telemetry implementation for probe-agent
  * This provides basic tracing functionality without complex OpenTelemetry dependencies
@@ -463,7 +478,7 @@ export class SimpleAppTracer {
     });
   }
 
-  async withSpan(spanName, fn, attributes = {}) {
+  async withSpan(spanName, fn, attributes = {}, onResult = null) {
     if (!this.isEnabled()) {
       return fn();
     }
@@ -476,12 +491,19 @@ export class SimpleAppTracer {
     try {
       const result = await fn();
       span.setStatus('OK');
+      if (onResult) {
+        try {
+          onResult(span, result);
+        } catch (_) {
+          // Don't let span enrichment errors break the flow
+        }
+      }
       return result;
     } catch (error) {
       span.setStatus('ERROR');
-      span.addEvent('exception', { 
+      span.addEvent('exception', {
         'exception.message': error.message,
-        'exception.stack': error.stack 
+        'exception.stack': error.stack
       });
       throw error;
     } finally {
diff --git a/npm/src/tools/vercel.js b/npm/src/tools/vercel.js
@@ -13,6 +13,7 @@ import { searchSchema, querySchema, extractSchema, delegateSchema, analyzeAllSch
 import { existsSync } from 'fs';
 import { formatErrorForAI } from '../utils/error-types.js';
 import { annotateOutputWithHashes } from './hashline.js';
+import { truncateForSpan } from '../agent/simpleTelemetry.js';
 
 /**
  * Auto-quote search query terms that contain mixed case or underscores.
@@ -551,6 +552,12 @@ export const searchTool = (options = {}) => {
 					? await options.tracer.withSpan('search.delegate', runDelegation, {
 						'search.query': searchQuery,
 						'search.path': searchPath
+					}, (span, result) => {
+						const text = typeof result === 'string' ? result : '';
+						span.setAttributes({
+							'search.delegate.output': truncateForSpan(text),
+							'search.delegate.output_length': text.length
+						});
 					})
 					: await runDelegation();
 
diff --git a/npm/tests/unit/search-delegate.test.js b/npm/tests/unit/search-delegate.test.js
@@ -88,7 +88,8 @@ describe('searchDelegate behavior', () => {
       expect.objectContaining({
         'search.query': expect.stringContaining('searchDelegate'),
         'search.path': expect.any(String)
-      })
+      }),
+      expect.any(Function)
     );
     const extractArgs = mockExtract.mock.calls[0][0];
     expect(extractArgs).toEqual(expect.objectContaining({ files: expect.any(Array) }));
diff --git a/npm/tests/unit/simpleTelemetry.test.js b/npm/tests/unit/simpleTelemetry.test.js
@@ -4,7 +4,50 @@
  */
 
 import { jest, describe, test, expect, beforeEach, afterEach } from '@jest/globals';
-import { SimpleTelemetry, SimpleAppTracer } from '../../src/agent/simpleTelemetry.js';
+import { SimpleTelemetry, SimpleAppTracer, truncateForSpan } from '../../src/agent/simpleTelemetry.js';
+
+describe('truncateForSpan', () => {
+  test('should return short text as-is', () => {
+    expect(truncateForSpan('hello')).toBe('hello');
+    expect(truncateForSpan('x'.repeat(4096))).toBe('x'.repeat(4096));
+  });
+
+  test('should return empty string for falsy input', () => {
+    expect(truncateForSpan('')).toBe('');
+    expect(truncateForSpan(null)).toBe('');
+    expect(truncateForSpan(undefined)).toBe('');
+  });
+
+  test('should preserve head and tail for long text', () => {
+    const text = 'H'.repeat(3000) + 'T'.repeat(3000);
+    const result = truncateForSpan(text, 4096);
+
+    expect(result.length).toBeLessThanOrEqual(4096);
+    expect(result).toMatch(/^H+/);  // starts with head
+    expect(result).toMatch(/T+$/);  // ends with tail
+    expect(result).toContain('chars omitted');
+  });
+
+  test('should report correct omitted count', () => {
+    const text = 'x'.repeat(10000);
+    const result = truncateForSpan(text, 4096);
+    const match = result.match(/\[(\d+) chars omitted\]/);
+
+    expect(match).not.toBeNull();
+    const omitted = parseInt(match[1], 10);
+    // head + tail + omitted should equal original length
+    const half = Math.floor((4096 - 40) / 2);
+    expect(omitted).toBe(10000 - half * 2);
+  });
+
+  test('should respect custom maxLen', () => {
+    const text = 'x'.repeat(500);
+    const result = truncateForSpan(text, 100);
+
+    expect(result.length).toBeLessThanOrEqual(150); // some slack for separator
+    expect(result).toContain('chars omitted');
+  });
+});
 
 describe('SimpleTelemetry', () => {
   let telemetry;
@@ -278,6 +321,75 @@ describe('SimpleAppTracer', () => {
 
       expect(result).toBe('executed');
     });
+
+    test('should call onResult callback with span and result before span ends', async () => {
+      let capturedSpan = null;
+      let capturedResult = null;
+
+      const result = await tracer.withSpan('ai.request', async () => {
+        return { finalText: 'AI response text' };
+      }, { 'ai.model': 'test-model' }, (span, res) => {
+        capturedSpan = span;
+        capturedResult = res;
+        span.setAttributes({
+          'ai.output': res.finalText,
+          'ai.output_length': res.finalText.length
+        });
+      });
+
+      expect(result).toEqual({ finalText: 'AI response text' });
+      expect(capturedSpan).not.toBeNull();
+      expect(capturedResult).toEqual({ finalText: 'AI response text' });
+      // Verify the attributes were set on the span
+      expect(capturedSpan.attributes['ai.output']).toBe('AI response text');
+      expect(capturedSpan.attributes['ai.output_length']).toBe(16);
+    });
+
+    test('should not break if onResult callback throws', async () => {
+      const result = await tracer.withSpan('ai.request', async () => {
+        return { finalText: 'response' };
+      }, {}, () => {
+        throw new Error('callback error');
+      });
+
+      // Should still return the result despite callback error
+      expect(result).toEqual({ finalText: 'response' });
+    });
+
+    test('should not call onResult on error', async () => {
+      let onResultCalled = false;
+
+      await expect(tracer.withSpan('ai.request', async () => {
+        throw new Error('execution failed');
+      }, {}, () => {
+        onResultCalled = true;
+      })).rejects.toThrow('execution failed');
+
+      expect(onResultCalled).toBe(false);
+    });
+
+    test('should truncate long output in onResult callback using head+tail', async () => {
+      let capturedSpan = null;
+      const longText = 'A'.repeat(2500) + 'B'.repeat(2500);
+
+      await tracer.withSpan('search.delegate', async () => {
+        return longText;
+      }, { 'search.query': 'test' }, (span, result) => {
+        capturedSpan = span;
+        const text = typeof result === 'string' ? result : '';
+        span.setAttributes({
+          'search.delegate.output': truncateForSpan(text),
+          'search.delegate.output_length': text.length
+        });
+      });
+
+      expect(capturedSpan.attributes['search.delegate.output'].length).toBeLessThan(5000);
+      expect(capturedSpan.attributes['search.delegate.output']).toContain('chars omitted');
+      // Should contain both head (A's) and tail (B's)
+      expect(capturedSpan.attributes['search.delegate.output']).toMatch(/^A+/);
+      expect(capturedSpan.attributes['search.delegate.output']).toMatch(/B+$/);
+      expect(capturedSpan.attributes['search.delegate.output_length']).toBe(5000);
+    });
   });
 
   describe('hashContent', () => {