From 209358670fdc832c183bf9fd6692483f3db7fe38 Mon Sep 17 00:00:00 2001 From: Andrei Borza Date: Fri, 12 Jun 2026 10:41:52 +0200 Subject: [PATCH] fix(core): Avoid double counting cached input tokens for Vercel AI SDK v6 The AI SDK v6 reports `ai.usage.inputTokens` as a cache-inclusive total, but our integration adds the cache-read count on top which is correct for + key.startsWith(AI_USAGE_INPUT_TOKEN_DETAILS_ATTRIBUTE_PREFIX), + ); if ( + !inputTokensAreCacheInclusive && typeof attributes[GEN_AI_USAGE_INPUT_TOKENS_ATTRIBUTE] === 'number' && typeof attributes[GEN_AI_USAGE_INPUT_TOKENS_CACHED_ATTRIBUTE] === 'number' ) { diff --git a/packages/core/src/tracing/vercel-ai/vercel-ai-attributes.ts b/packages/core/src/tracing/vercel-ai/vercel-ai-attributes.ts index 871032153d3c..62d89f50c17c 100644 --- a/packages/core/src/tracing/vercel-ai/vercel-ai-attributes.ts +++ b/packages/core/src/tracing/vercel-ai/vercel-ai-attributes.ts @@ -145,6 +145,14 @@ export const AI_RESPONSE_PROVIDER_METADATA_ATTRIBUTE = 'ai.response.providerMeta * @see https://ai-sdk.dev/docs/ai-sdk-core/telemetry#basic-llm-span-information */ export const AI_USAGE_CACHED_INPUT_TOKENS_ATTRIBUTE = 'ai.usage.cachedInputTokens'; + +/** + * Prefix for the per-category breakdown of input tokens (AI SDK v6+), e.g. + * `noCacheTokens`, `cacheReadTokens`, `cacheWriteTokens`. Only emitted by v6, where + * `ai.usage.inputTokens` is already cache-inclusive. + */ +export const AI_USAGE_INPUT_TOKEN_DETAILS_ATTRIBUTE_PREFIX = 'ai.usage.inputTokenDetails.'; + /** * Basic LLM span information * Multiple spans diff --git a/packages/core/test/lib/tracing/vercel-ai-cached-tokens.test.ts b/packages/core/test/lib/tracing/vercel-ai-cached-tokens.test.ts index 3ff720959d8e..4ab68c6f8d1b 100644 --- a/packages/core/test/lib/tracing/vercel-ai-cached-tokens.test.ts +++ b/packages/core/test/lib/tracing/vercel-ai-cached-tokens.test.ts @@ -3,37 +3,62 @@ import { addVercelAiProcessors } from '../../../src/tracing/vercel-ai'; import type { SpanJSON } from '../../../src/types/span'; import { getDefaultTestClientOptions, TestClient } from '../../mocks/client'; +function processSpan(data: SpanJSON['data']): SpanJSON { + const options = getDefaultTestClientOptions({ tracesSampleRate: 1.0 }); + const client = new TestClient(options); + client.init(); + addVercelAiProcessors(client); + + const mockSpan: SpanJSON = { + description: 'test', + span_id: 'test-span-id', + trace_id: 'test-trace-id', + start_timestamp: 1000, + timestamp: 2000, + origin: 'auto.vercelai.otel', + data, + }; + + const event = { + type: 'transaction' as const, + spans: [mockSpan], + }; + + const eventProcessor = client['_eventProcessors'].find(processor => processor.id === 'VercelAiEventProcessor'); + expect(eventProcessor).toBeDefined(); + + return eventProcessor!(event, {})!.spans![0]!; +} + describe('vercel-ai cached tokens', () => { - it('should add cached input tokens to total input tokens', () => { - const options = getDefaultTestClientOptions({ tracesSampleRate: 1.0 }); - const client = new TestClient(options); - client.init(); - addVercelAiProcessors(client); - - const mockSpan: SpanJSON = { - description: 'test', - span_id: 'test-span-id', - trace_id: 'test-trace-id', - start_timestamp: 1000, - timestamp: 2000, - origin: 'auto.vercelai.otel', - data: { - 'ai.usage.promptTokens': 100, - 'ai.usage.cachedInputTokens': 50, - }, - }; - - const event = { - type: 'transaction' as const, - spans: [mockSpan], - }; - - const eventProcessor = client['_eventProcessors'].find(processor => processor.id === 'VercelAiEventProcessor'); - expect(eventProcessor).toBeDefined(); - - const processedEvent = eventProcessor!(event, {}); - - expect(processedEvent?.spans?.[0]?.data?.['gen_ai.usage.input_tokens']).toBe(150); - expect(processedEvent?.spans?.[0]?.data?.['gen_ai.usage.input_tokens.cached']).toBe(50); + it('adds cached input tokens to the input tokens for AI SDK <=5 (cache-exclusive input tokens)', () => { + const span = processSpan({ + 'ai.usage.promptTokens': 100, + 'ai.usage.cachedInputTokens': 50, + }); + + expect(span.data?.['gen_ai.usage.input_tokens']).toBe(150); + expect(span.data?.['gen_ai.usage.input_tokens.cached']).toBe(50); + expect(span.data?.['gen_ai.usage.total_tokens']).toBe(150); + }); + + it('does not double-count cached input tokens for AI SDK v6 (cache-inclusive input tokens)', () => { + // AI SDK v6 reports `ai.usage.inputTokens` as a cache-inclusive total + // (noCache + cacheRead + cacheWrite) and emits the breakdown under + // `ai.usage.inputTokenDetails.*`. The cached tokens must not be added again. + const span = processSpan({ + 'ai.operationId': 'ai.streamText.doStream', + 'ai.usage.inputTokens': 9500, // 1000 noCache + 8000 cacheRead + 500 cacheWrite + 'ai.usage.outputTokens': 300, + 'ai.usage.cachedInputTokens': 8000, + 'ai.usage.inputTokenDetails.noCacheTokens': 1000, + 'ai.usage.inputTokenDetails.cacheReadTokens': 8000, + 'ai.usage.inputTokenDetails.cacheWriteTokens': 500, + }); + + expect(span.data?.['gen_ai.usage.input_tokens']).toBe(9500); + expect(span.data?.['gen_ai.usage.input_tokens.cached']).toBe(8000); + expect(span.data?.['gen_ai.usage.output_tokens']).toBe(300); + expect(span.data?.['gen_ai.usage.total_tokens']).toBe(9800); }); });