Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
feat: store AI response output on ai.request and search.delegate spans (
#529)

Add onResult callback parameter to withSpan() that enriches spans with
result data before they close. Captures ai.output and ai.output_length
on ai.request spans, and search.delegate.output and
search.delegate.output_length on search.delegate spans.

Adds truncateForSpan() helper that preserves head + tail of long text
(first ~2K chars and last ~2K chars with omitted count) instead of
just truncating from the front, giving better context in traces.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
  • Loading branch information
buger and claude committed Mar 18, 2026
commit d521d3c6889e1625406f5736b84ae5d7499f173e
11 changes: 8 additions & 3 deletions npm/src/agent/ProbeAgent.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import { readFile, stat, readdir } from 'fs/promises';
import { resolve, isAbsolute, dirname, basename, normalize, sep } from 'path';
import { TokenCounter } from './tokenCounter.js';
import { truncateForSpan } from './simpleTelemetry.js';
import { InMemoryStorageAdapter } from './storage/InMemoryStorageAdapter.js';
import { HookManager, HOOK_TYPES } from './hooks/HookManager.js';
import { SUPPORTED_IMAGE_EXTENSIONS, IMAGE_MIME_TYPES, isFormatSupportedByProvider } from './imageConfig.js';
Expand Down Expand Up @@ -4327,9 +4328,7 @@

let aiResult;
if (this.tracer) {
const inputPreview = message.length > 1000
? message.substring(0, 1000) + '... [truncated]'
: message;
const inputPreview = truncateForSpan(message, 4096);

Check notice on line 4331 in npm/src/agent/ProbeAgent.js

View check run for this annotation

probelabs / Visor: architecture

style Issue

truncateForSpan is called with explicit maxLen=4096 for input, but the default is already 4096. This inconsistency with vercel.js (which uses the default) makes the code harder to maintain.
Raw output
Either consistently use explicit maxLen everywhere for clarity, or rely on the default everywhere. Remove the redundant 4096 parameter here to match vercel.js usage.

aiResult = await this.tracer.withSpan('ai.request', executeAIRequest, {
'ai.model': this.model,
Expand All @@ -4340,6 +4339,12 @@
'max_tokens': maxResponseTokens,
'temperature': 0.3,
'message_count': currentMessages.length
}, (span, result) => {
const text = result?.finalText || '';
span.setAttributes({
'ai.output': truncateForSpan(text),
'ai.output_length': text.length
});
});
} else {
aiResult = await executeAIRequest();
Expand Down
28 changes: 25 additions & 3 deletions npm/src/agent/simpleTelemetry.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@
import { dirname } from 'path';
import { patchConsole } from './otelLogBridge.js';

/**
* Truncate text for span attributes, preserving head and tail for context.
* For text <= maxLen, returns as-is. For longer text, shows first half and
* last half of the budget with a separator indicating omitted chars.
* @param {string} text - The text to truncate
* @param {number} [maxLen=4096] - Maximum output length
* @returns {string} The truncated text
*/
export function truncateForSpan(text, maxLen = 4096) {
if (!text || text.length <= maxLen) return text || '';
const half = Math.floor((maxLen - 40) / 2); // 40 chars reserved for separator
const omitted = text.length - half * 2;
return text.substring(0, half) + `\n... [${omitted} chars omitted] ...\n` + text.substring(text.length - half);
}

Check notice on line 19 in npm/src/agent/simpleTelemetry.js

View check run for this annotation

probelabs / Visor: architecture

style Issue

The value 40 is hard-coded for separator buffer space without explanation. This creates a maintenance burden if the separator format changes.
Raw output
Extract the separator to a constant and derive the buffer size from it: const SEPARATOR = (omitted) => `\n... [${omitted} chars omitted] ...\n`; then calculate buffer dynamically or document why 40 is the chosen value.
/**
* Simple telemetry implementation for probe-agent
* This provides basic tracing functionality without complex OpenTelemetry dependencies
Expand Down Expand Up @@ -463,7 +478,7 @@
});
}

async withSpan(spanName, fn, attributes = {}) {
async withSpan(spanName, fn, attributes = {}, onResult = null) {
if (!this.isEnabled()) {
return fn();
}
Expand All @@ -476,12 +491,19 @@
try {
const result = await fn();
span.setStatus('OK');
if (onResult) {
try {
onResult(span, result);
} catch (_) {

Check warning on line 497 in npm/src/agent/simpleTelemetry.js

View check run for this annotation

probelabs / Visor: architecture

architecture Issue

The onResult callback errors are silently swallowed with an empty catch block. This could hide bugs in span enrichment logic and make debugging difficult when attributes are not being set correctly.
Raw output
Log the error at debug/trace level so developers can diagnose enrichment failures without breaking the main flow. Example: catch (err) { this.logger?.debug('Span enrichment failed', err); }
// Don't let span enrichment errors break the flow
}
}
return result;
} catch (error) {
span.setStatus('ERROR');
span.addEvent('exception', {
span.addEvent('exception', {
'exception.message': error.message,
'exception.stack': error.stack
'exception.stack': error.stack
});
throw error;
} finally {
Expand Down
7 changes: 7 additions & 0 deletions npm/src/tools/vercel.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import { existsSync } from 'fs';
import { formatErrorForAI } from '../utils/error-types.js';
import { annotateOutputWithHashes } from './hashline.js';
import { truncateForSpan } from '../agent/simpleTelemetry.js';

/**
* Auto-quote search query terms that contain mixed case or underscores.
Expand Down Expand Up @@ -551,6 +552,12 @@
? await options.tracer.withSpan('search.delegate', runDelegation, {
'search.query': searchQuery,
'search.path': searchPath
}, (span, result) => {
const text = typeof result === 'string' ? result : '';
span.setAttributes({
'search.delegate.output': truncateForSpan(text),

Check warning on line 558 in npm/src/tools/vercel.js

View check run for this annotation

probelabs / Visor: architecture

architecture Issue

The onResult callback pattern for setting output attributes is duplicated between ProbeAgent.js and vercel.js. This creates maintenance overhead and risk of inconsistency.
Raw output
Consider creating a helper function like enrichSpanWithOutput(span, result, outputKey) that encapsulates the truncation and attribute setting logic, reducing duplication and ensuring consistent behavior.
'search.delegate.output_length': text.length
});
})
: await runDelegation();

Expand Down
3 changes: 2 additions & 1 deletion npm/tests/unit/search-delegate.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ describe('searchDelegate behavior', () => {
expect.objectContaining({
'search.query': expect.stringContaining('searchDelegate'),
'search.path': expect.any(String)
})
}),
expect.any(Function)
);
const extractArgs = mockExtract.mock.calls[0][0];
expect(extractArgs).toEqual(expect.objectContaining({ files: expect.any(Array) }));
Expand Down
114 changes: 113 additions & 1 deletion npm/tests/unit/simpleTelemetry.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,50 @@
*/

import { jest, describe, test, expect, beforeEach, afterEach } from '@jest/globals';
import { SimpleTelemetry, SimpleAppTracer } from '../../src/agent/simpleTelemetry.js';
import { SimpleTelemetry, SimpleAppTracer, truncateForSpan } from '../../src/agent/simpleTelemetry.js';

describe('truncateForSpan', () => {
test('should return short text as-is', () => {
expect(truncateForSpan('hello')).toBe('hello');
expect(truncateForSpan('x'.repeat(4096))).toBe('x'.repeat(4096));
});

test('should return empty string for falsy input', () => {
expect(truncateForSpan('')).toBe('');
expect(truncateForSpan(null)).toBe('');
expect(truncateForSpan(undefined)).toBe('');
});

test('should preserve head and tail for long text', () => {
const text = 'H'.repeat(3000) + 'T'.repeat(3000);
const result = truncateForSpan(text, 4096);

expect(result.length).toBeLessThanOrEqual(4096);
expect(result).toMatch(/^H+/); // starts with head
expect(result).toMatch(/T+$/); // ends with tail
expect(result).toContain('chars omitted');
});

test('should report correct omitted count', () => {
const text = 'x'.repeat(10000);
const result = truncateForSpan(text, 4096);
const match = result.match(/\[(\d+) chars omitted\]/);

expect(match).not.toBeNull();
const omitted = parseInt(match[1], 10);
// head + tail + omitted should equal original length
const half = Math.floor((4096 - 40) / 2);
expect(omitted).toBe(10000 - half * 2);
});

test('should respect custom maxLen', () => {
const text = 'x'.repeat(500);
const result = truncateForSpan(text, 100);

expect(result.length).toBeLessThanOrEqual(150); // some slack for separator
expect(result).toContain('chars omitted');
});
});

describe('SimpleTelemetry', () => {
let telemetry;
Expand Down Expand Up @@ -278,6 +321,75 @@ describe('SimpleAppTracer', () => {

expect(result).toBe('executed');
});

test('should call onResult callback with span and result before span ends', async () => {
let capturedSpan = null;
let capturedResult = null;

const result = await tracer.withSpan('ai.request', async () => {
return { finalText: 'AI response text' };
}, { 'ai.model': 'test-model' }, (span, res) => {
capturedSpan = span;
capturedResult = res;
span.setAttributes({
'ai.output': res.finalText,
'ai.output_length': res.finalText.length
});
});

expect(result).toEqual({ finalText: 'AI response text' });
expect(capturedSpan).not.toBeNull();
expect(capturedResult).toEqual({ finalText: 'AI response text' });
// Verify the attributes were set on the span
expect(capturedSpan.attributes['ai.output']).toBe('AI response text');
expect(capturedSpan.attributes['ai.output_length']).toBe(16);
});

test('should not break if onResult callback throws', async () => {
const result = await tracer.withSpan('ai.request', async () => {
return { finalText: 'response' };
}, {}, () => {
throw new Error('callback error');
});

// Should still return the result despite callback error
expect(result).toEqual({ finalText: 'response' });
});

test('should not call onResult on error', async () => {
let onResultCalled = false;

await expect(tracer.withSpan('ai.request', async () => {
throw new Error('execution failed');
}, {}, () => {
onResultCalled = true;
})).rejects.toThrow('execution failed');

expect(onResultCalled).toBe(false);
});

test('should truncate long output in onResult callback using head+tail', async () => {
let capturedSpan = null;
const longText = 'A'.repeat(2500) + 'B'.repeat(2500);

await tracer.withSpan('search.delegate', async () => {
return longText;
}, { 'search.query': 'test' }, (span, result) => {
capturedSpan = span;
const text = typeof result === 'string' ? result : '';
span.setAttributes({
'search.delegate.output': truncateForSpan(text),
'search.delegate.output_length': text.length
});
});

expect(capturedSpan.attributes['search.delegate.output'].length).toBeLessThan(5000);
expect(capturedSpan.attributes['search.delegate.output']).toContain('chars omitted');
// Should contain both head (A's) and tail (B's)
expect(capturedSpan.attributes['search.delegate.output']).toMatch(/^A+/);
expect(capturedSpan.attributes['search.delegate.output']).toMatch(/B+$/);
expect(capturedSpan.attributes['search.delegate.output_length']).toBe(5000);
});
});

describe('hashContent', () => {
Expand Down
Loading