Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions containers/api-proxy/otel.js
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,45 @@ function setTokenAttributes(span, { provider, model, normalizedUsage, streaming
} catch { /* best-effort */ }
}

/**
* Attach AI-credits and effective-token budget attributes to the span.
*
* Called from the onUsage callback after computeTokenBudgetUsage returns,
* so that per-request AI credits are queryable in Sentry/Grafana.
*
Comment thread
Copilot marked this conversation as resolved.
* All values are emitted as strings following the awf.* convention
* (Sentry drops unknown numeric attributes; see docs/otel-sentry.md).
* Effective-token counts use the key prefix "model_units" to avoid Sentry's
* PII scrubbing rule that redacts values for keys containing "token".
*
* @param {import('@opentelemetry/api').Span} span
* @param {object|undefined} budgetResult - Output from computeTokenBudgetUsage
*/
function setBudgetAttributes(span, budgetResult) {
if (!_enabled || !span || !budgetResult) return;
try {
const attrs = {};
if (budgetResult.ai_credits_this_response != null) {
attrs['awf.ai_credits'] = String(budgetResult.ai_credits_this_response);
}
if (budgetResult.ai_credits_total != null) {
attrs['awf.ai_credits_total'] = String(budgetResult.ai_credits_total);
}
Comment on lines +212 to +217
if (budgetResult.effective_tokens_this_response != null) {
attrs['awf.model_units'] = String(budgetResult.effective_tokens_this_response);
}
if (budgetResult.effective_tokens_total != null) {
attrs['awf.model_units_total'] = String(budgetResult.effective_tokens_total);
}
Comment on lines +218 to +223
if (budgetResult.model_multiplier != null) {
attrs['awf.model_multiplier'] = String(budgetResult.model_multiplier);
}
if (Object.keys(attrs).length > 0) {
span.setAttributes(attrs);
}
} catch { /* best-effort */ }
}

/**
* End a span successfully with the upstream HTTP status code.
*
Expand Down Expand Up @@ -251,6 +290,7 @@ function isEnabled() { return _enabled; }
module.exports = {
startRequestSpan,
setTokenAttributes,
setBudgetAttributes,
endSpan,
endSpanError,
shutdown,
Expand Down
62 changes: 62 additions & 0 deletions containers/api-proxy/otel.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,68 @@ describe('otel — setTokenAttributes', () => {
});
});

describe('otel — setBudgetAttributes', () => {
test('sets AI credits and model unit attributes as strings', async () => {
const { otel, memExporter } = loadOtelWithMemoryExporter();

const span = otel.startRequestSpan({
provider: 'anthropic', method: 'POST', path: '/v1/messages', requestId: 'b1',
});

otel.setBudgetAttributes(span, {
ai_credits_this_response: 0.042,
ai_credits_total: 1.5,
effective_tokens_this_response: 3500,
effective_tokens_total: 85000,
model_multiplier: 2.5,
});

otel.endSpan(span, 200);

await otel._provider.forceFlush();
const s = memExporter.getFinishedSpans()[0];

expect(s.attributes['awf.ai_credits']).toBe('0.042');
expect(s.attributes['awf.ai_credits_total']).toBe('1.5');
expect(s.attributes['awf.model_units']).toBe('3500');
expect(s.attributes['awf.model_units_total']).toBe('85000');
expect(s.attributes['awf.model_multiplier']).toBe('2.5');
});

test('sets only ai_credits when model units are absent', async () => {
const { otel, memExporter } = loadOtelWithMemoryExporter();

const span = otel.startRequestSpan({
provider: 'openai', method: 'POST', path: '/v1/chat/completions', requestId: 'b2',
});

otel.setBudgetAttributes(span, {
ai_credits_this_response: 0.01,
ai_credits_total: 0.05,
});

otel.endSpan(span, 200);

await otel._provider.forceFlush();
const s = memExporter.getFinishedSpans()[0];

expect(s.attributes['awf.ai_credits']).toBe('0.01');
expect(s.attributes['awf.ai_credits_total']).toBe('0.05');
expect(s.attributes['awf.model_units']).toBeUndefined();
expect(s.attributes['awf.model_units_total']).toBeUndefined();
});

test('is a no-op when budgetResult is undefined', () => {
const otel = loadOtel();
expect(() => otel.setBudgetAttributes({}, undefined)).not.toThrow();
});

test('is a no-op on a null span', () => {
const otel = loadOtel();
expect(() => otel.setBudgetAttributes(null, { ai_credits_this_response: 1 })).not.toThrow();
});
});

describe('otel — endSpan', () => {
test('sets OK status for 2xx response', async () => {
const { otel, memExporter } = loadOtelWithMemoryExporter();
Expand Down
1 change: 1 addition & 0 deletions containers/api-proxy/proxy-request.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ try {
otel = {
startRequestSpan: () => noopSpan,
setTokenAttributes: noop,
setBudgetAttributes: noop,
endSpan: noop,
endSpanError: noop,
shutdown: () => Promise.resolve(),
Expand Down
4 changes: 3 additions & 1 deletion containers/api-proxy/upstream-response.js
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,9 @@ function createUpstreamResponseHandlers({
requestId, provider, path: sanitizeForLog(req.url), startTime, metrics, billingInfo, initiatorSent,
onUsage: (normalizedUsage, model) => {
otel.setTokenAttributes(span, { provider, model, normalizedUsage, streaming: isStreaming });
return computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model);
const budgetResult = computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model);
otel.setBudgetAttributes(span, budgetResult);
return budgetResult;
},
onSpanEnd: (statusCode) => {
otel.endSpan(span, statusCode);
Expand Down
5 changes: 5 additions & 0 deletions docs/otel-sentry.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ In Sentry's trace detail view, expand a span and look under **Tags & Attributes*
| `awf.cached_read` | string | Number of prompt tokens served from cache (as string) |
| `awf.cached_write` | string | Number of tokens written to cache (as string) |
| `awf.reasoning` | string | Number of reasoning/thinking tokens (as string) |
| `awf.ai_credits` | string | AI credits consumed by this request |
| `awf.ai_credits_total` | string | Running total AI credits for the session |
| `awf.model_units` | string | Effective (multiplier-adjusted) token units this request |
| `awf.model_units_total` | string | Running total effective token units for the session |
| `awf.model_multiplier` | string | Model cost multiplier applied to this request |

### `http` group

Expand Down
Loading