github · lpcox · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/containers/api-proxy/otel.js b/containers/api-proxy/otel.js
@@ -191,6 +191,45 @@ function setTokenAttributes(span, { provider, model, normalizedUsage, streaming
   } catch { /* best-effort */ }
 }
 
+/**
+ * Attach AI-credits and effective-token budget attributes to the span.
+ *
+ * Called from the onUsage callback after computeTokenBudgetUsage returns,
+ * so that per-request AI credits are queryable in Sentry/Grafana.
+ *
+ * All values are emitted as strings following the awf.* convention
+ * (Sentry drops unknown numeric attributes; see docs/otel-sentry.md).
+ * Effective-token counts use the key prefix "model_units" to avoid Sentry's
+ * PII scrubbing rule that redacts values for keys containing "token".
+ *
+ * @param {import('@opentelemetry/api').Span} span
+ * @param {object|undefined} budgetResult - Output from computeTokenBudgetUsage
+ */
+function setBudgetAttributes(span, budgetResult) {
+  if (!_enabled || !span || !budgetResult) return;
+  try {
+    const attrs = {};
+    if (budgetResult.ai_credits_this_response != null) {
+      attrs['awf.ai_credits'] = String(budgetResult.ai_credits_this_response);
+    }
+    if (budgetResult.ai_credits_total != null) {
+      attrs['awf.ai_credits_total'] = String(budgetResult.ai_credits_total);
+    }
+    if (budgetResult.effective_tokens_this_response != null) {
+      attrs['awf.model_units'] = String(budgetResult.effective_tokens_this_response);
+    }
+    if (budgetResult.effective_tokens_total != null) {
+      attrs['awf.model_units_total'] = String(budgetResult.effective_tokens_total);
+    }
+    if (budgetResult.model_multiplier != null) {
+      attrs['awf.model_multiplier'] = String(budgetResult.model_multiplier);
+    }
+    if (Object.keys(attrs).length > 0) {
+      span.setAttributes(attrs);
+    }
+  } catch { /* best-effort */ }
+}
+
 /**
  * End a span successfully with the upstream HTTP status code.
  *
@@ -251,6 +290,7 @@ function isEnabled() { return _enabled; }
 module.exports = {
   startRequestSpan,
   setTokenAttributes,
+  setBudgetAttributes,
   endSpan,
   endSpanError,
   shutdown,

diff --git a/containers/api-proxy/otel.test.js b/containers/api-proxy/otel.test.js
@@ -290,6 +290,68 @@ describe('otel — setTokenAttributes', () => {
   });
 });
 
+describe('otel — setBudgetAttributes', () => {
+  test('sets AI credits and model unit attributes as strings', async () => {
+    const { otel, memExporter } = loadOtelWithMemoryExporter();
+
+    const span = otel.startRequestSpan({
+      provider: 'anthropic', method: 'POST', path: '/v1/messages', requestId: 'b1',
+    });
+
+    otel.setBudgetAttributes(span, {
+      ai_credits_this_response: 0.042,
+      ai_credits_total: 1.5,
+      effective_tokens_this_response: 3500,
+      effective_tokens_total: 85000,
+      model_multiplier: 2.5,
+    });
+
+    otel.endSpan(span, 200);
+
+    await otel._provider.forceFlush();
+    const s = memExporter.getFinishedSpans()[0];
+
+    expect(s.attributes['awf.ai_credits']).toBe('0.042');
+    expect(s.attributes['awf.ai_credits_total']).toBe('1.5');
+    expect(s.attributes['awf.model_units']).toBe('3500');
+    expect(s.attributes['awf.model_units_total']).toBe('85000');
+    expect(s.attributes['awf.model_multiplier']).toBe('2.5');
+  });
+
+  test('sets only ai_credits when model units are absent', async () => {
+    const { otel, memExporter } = loadOtelWithMemoryExporter();
+
+    const span = otel.startRequestSpan({
+      provider: 'openai', method: 'POST', path: '/v1/chat/completions', requestId: 'b2',
+    });
+
+    otel.setBudgetAttributes(span, {
+      ai_credits_this_response: 0.01,
+      ai_credits_total: 0.05,
+    });
+
+    otel.endSpan(span, 200);
+
+    await otel._provider.forceFlush();
+    const s = memExporter.getFinishedSpans()[0];
+
+    expect(s.attributes['awf.ai_credits']).toBe('0.01');
+    expect(s.attributes['awf.ai_credits_total']).toBe('0.05');
+    expect(s.attributes['awf.model_units']).toBeUndefined();
+    expect(s.attributes['awf.model_units_total']).toBeUndefined();
+  });
+
+  test('is a no-op when budgetResult is undefined', () => {
+    const otel = loadOtel();
+    expect(() => otel.setBudgetAttributes({}, undefined)).not.toThrow();
+  });
+
+  test('is a no-op on a null span', () => {
+    const otel = loadOtel();
+    expect(() => otel.setBudgetAttributes(null, { ai_credits_this_response: 1 })).not.toThrow();
+  });
+});
+
 describe('otel — endSpan', () => {
   test('sets OK status for 2xx response', async () => {
     const { otel, memExporter } = loadOtelWithMemoryExporter();

diff --git a/containers/api-proxy/proxy-request.js b/containers/api-proxy/proxy-request.js
@@ -92,6 +92,7 @@ try {
     otel = {
       startRequestSpan:  () => noopSpan,
       setTokenAttributes: noop,
+      setBudgetAttributes: noop,
       endSpan:           noop,
       endSpanError:      noop,
       shutdown:          () => Promise.resolve(),

diff --git a/containers/api-proxy/upstream-response.js b/containers/api-proxy/upstream-response.js
@@ -231,7 +231,9 @@ function createUpstreamResponseHandlers({
       requestId, provider, path: sanitizeForLog(req.url), startTime, metrics, billingInfo, initiatorSent,
       onUsage: (normalizedUsage, model) => {
         otel.setTokenAttributes(span, { provider, model, normalizedUsage, streaming: isStreaming });
-        return computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model);
+        const budgetResult = computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model);
+        otel.setBudgetAttributes(span, budgetResult);
+        return budgetResult;
       },
       onSpanEnd: (statusCode) => {
         otel.endSpan(span, statusCode);

diff --git a/docs/otel-sentry.md b/docs/otel-sentry.md
@@ -53,6 +53,11 @@ In Sentry's trace detail view, expand a span and look under **Tags & Attributes*
 | `awf.cached_read` | string | Number of prompt tokens served from cache (as string) |
 | `awf.cached_write` | string | Number of tokens written to cache (as string) |
 | `awf.reasoning` | string | Number of reasoning/thinking tokens (as string) |
+| `awf.ai_credits` | string | AI credits consumed by this request |
+| `awf.ai_credits_total` | string | Running total AI credits for the session |
+| `awf.model_units` | string | Effective (multiplier-adjusted) token units this request |
+| `awf.model_units_total` | string | Running total effective token units for the session |
+| `awf.model_multiplier` | string | Model cost multiplier applied to this request |
 
 ### `http` group