diff --git a/containers/api-proxy/otel.js b/containers/api-proxy/otel.js index eed77c81..b51a965c 100644 --- a/containers/api-proxy/otel.js +++ b/containers/api-proxy/otel.js @@ -191,6 +191,45 @@ function setTokenAttributes(span, { provider, model, normalizedUsage, streaming } catch { /* best-effort */ } } +/** + * Attach AI-credits and effective-token budget attributes to the span. + * + * Called from the onUsage callback after computeTokenBudgetUsage returns, + * so that per-request AI credits are queryable in Sentry/Grafana. + * + * All values are emitted as strings following the awf.* convention + * (Sentry drops unknown numeric attributes; see docs/otel-sentry.md). + * Effective-token counts use the key prefix "model_units" to avoid Sentry's + * PII scrubbing rule that redacts values for keys containing "token". + * + * @param {import('@opentelemetry/api').Span} span + * @param {object|undefined} budgetResult - Output from computeTokenBudgetUsage + */ +function setBudgetAttributes(span, budgetResult) { + if (!_enabled || !span || !budgetResult) return; + try { + const attrs = {}; + if (budgetResult.ai_credits_this_response != null) { + attrs['awf.ai_credits'] = String(budgetResult.ai_credits_this_response); + } + if (budgetResult.ai_credits_total != null) { + attrs['awf.ai_credits_total'] = String(budgetResult.ai_credits_total); + } + if (budgetResult.effective_tokens_this_response != null) { + attrs['awf.model_units'] = String(budgetResult.effective_tokens_this_response); + } + if (budgetResult.effective_tokens_total != null) { + attrs['awf.model_units_total'] = String(budgetResult.effective_tokens_total); + } + if (budgetResult.model_multiplier != null) { + attrs['awf.model_multiplier'] = String(budgetResult.model_multiplier); + } + if (Object.keys(attrs).length > 0) { + span.setAttributes(attrs); + } + } catch { /* best-effort */ } +} + /** * End a span successfully with the upstream HTTP status code. * @@ -251,6 +290,7 @@ function isEnabled() { return _enabled; } module.exports = { startRequestSpan, setTokenAttributes, + setBudgetAttributes, endSpan, endSpanError, shutdown, diff --git a/containers/api-proxy/otel.test.js b/containers/api-proxy/otel.test.js index f44ce267..6fe27ab6 100644 --- a/containers/api-proxy/otel.test.js +++ b/containers/api-proxy/otel.test.js @@ -290,6 +290,68 @@ describe('otel — setTokenAttributes', () => { }); }); +describe('otel — setBudgetAttributes', () => { + test('sets AI credits and model unit attributes as strings', async () => { + const { otel, memExporter } = loadOtelWithMemoryExporter(); + + const span = otel.startRequestSpan({ + provider: 'anthropic', method: 'POST', path: '/v1/messages', requestId: 'b1', + }); + + otel.setBudgetAttributes(span, { + ai_credits_this_response: 0.042, + ai_credits_total: 1.5, + effective_tokens_this_response: 3500, + effective_tokens_total: 85000, + model_multiplier: 2.5, + }); + + otel.endSpan(span, 200); + + await otel._provider.forceFlush(); + const s = memExporter.getFinishedSpans()[0]; + + expect(s.attributes['awf.ai_credits']).toBe('0.042'); + expect(s.attributes['awf.ai_credits_total']).toBe('1.5'); + expect(s.attributes['awf.model_units']).toBe('3500'); + expect(s.attributes['awf.model_units_total']).toBe('85000'); + expect(s.attributes['awf.model_multiplier']).toBe('2.5'); + }); + + test('sets only ai_credits when model units are absent', async () => { + const { otel, memExporter } = loadOtelWithMemoryExporter(); + + const span = otel.startRequestSpan({ + provider: 'openai', method: 'POST', path: '/v1/chat/completions', requestId: 'b2', + }); + + otel.setBudgetAttributes(span, { + ai_credits_this_response: 0.01, + ai_credits_total: 0.05, + }); + + otel.endSpan(span, 200); + + await otel._provider.forceFlush(); + const s = memExporter.getFinishedSpans()[0]; + + expect(s.attributes['awf.ai_credits']).toBe('0.01'); + expect(s.attributes['awf.ai_credits_total']).toBe('0.05'); + expect(s.attributes['awf.model_units']).toBeUndefined(); + expect(s.attributes['awf.model_units_total']).toBeUndefined(); + }); + + test('is a no-op when budgetResult is undefined', () => { + const otel = loadOtel(); + expect(() => otel.setBudgetAttributes({}, undefined)).not.toThrow(); + }); + + test('is a no-op on a null span', () => { + const otel = loadOtel(); + expect(() => otel.setBudgetAttributes(null, { ai_credits_this_response: 1 })).not.toThrow(); + }); +}); + describe('otel — endSpan', () => { test('sets OK status for 2xx response', async () => { const { otel, memExporter } = loadOtelWithMemoryExporter(); diff --git a/containers/api-proxy/proxy-request.js b/containers/api-proxy/proxy-request.js index 6acb8bbd..94ab547b 100644 --- a/containers/api-proxy/proxy-request.js +++ b/containers/api-proxy/proxy-request.js @@ -92,6 +92,7 @@ try { otel = { startRequestSpan: () => noopSpan, setTokenAttributes: noop, + setBudgetAttributes: noop, endSpan: noop, endSpanError: noop, shutdown: () => Promise.resolve(), diff --git a/containers/api-proxy/upstream-response.js b/containers/api-proxy/upstream-response.js index 44d33b95..8c41255c 100644 --- a/containers/api-proxy/upstream-response.js +++ b/containers/api-proxy/upstream-response.js @@ -231,7 +231,9 @@ function createUpstreamResponseHandlers({ requestId, provider, path: sanitizeForLog(req.url), startTime, metrics, billingInfo, initiatorSent, onUsage: (normalizedUsage, model) => { otel.setTokenAttributes(span, { provider, model, normalizedUsage, streaming: isStreaming }); - return computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model); + const budgetResult = computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model); + otel.setBudgetAttributes(span, budgetResult); + return budgetResult; }, onSpanEnd: (statusCode) => { otel.endSpan(span, statusCode); diff --git a/docs/otel-sentry.md b/docs/otel-sentry.md index f05105ae..0b4d92ef 100644 --- a/docs/otel-sentry.md +++ b/docs/otel-sentry.md @@ -53,6 +53,11 @@ In Sentry's trace detail view, expand a span and look under **Tags & Attributes* | `awf.cached_read` | string | Number of prompt tokens served from cache (as string) | | `awf.cached_write` | string | Number of tokens written to cache (as string) | | `awf.reasoning` | string | Number of reasoning/thinking tokens (as string) | +| `awf.ai_credits` | string | AI credits consumed by this request | +| `awf.ai_credits_total` | string | Running total AI credits for the session | +| `awf.model_units` | string | Effective (multiplier-adjusted) token units this request | +| `awf.model_units_total` | string | Running total effective token units for the session | +| `awf.model_multiplier` | string | Model cost multiplier applied to this request | ### `http` group