coder · pawbana · Mar 31, 2026 · Mar 19, 2026 · Mar 25, 2026 · Mar 31, 2026
diff --git a/example/recorder.go b/example/recorder.go
@@ -40,8 +40,8 @@ func NewSQLiteRecorder(db *sql.DB, logger slog.Logger) (*SQLiteRecorder, error)
 	}
 
 	r.stmtInsertTokenUsage, err = db.Prepare(`
-		INSERT INTO aibridge_token_usages (id, interception_id, provider_response_id, input_tokens, output_tokens, metadata, created_at)
-		VALUES (?, ?, ?, ?, ?, ?, ?)`)
+		INSERT INTO aibridge_token_usages (id, interception_id, provider_response_id, input_tokens, output_tokens, cache_read_input_tokens, cache_write_input_tokens, metadata, created_at)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`)
 	if err != nil {
 		return nil, err
 	}
@@ -103,7 +103,7 @@ func (r *SQLiteRecorder) RecordTokenUsage(ctx context.Context, req *aibridge.Tok
 	metadata, _ := json.Marshal(merged)
 
 	_, err := r.stmtInsertTokenUsage.ExecContext(ctx,
-		uuid.NewString(), req.InterceptionID, req.MsgID, req.Input, req.Output, string(metadata), req.CreatedAt,
+		uuid.NewString(), req.InterceptionID, req.MsgID, req.Input, req.Output, req.CacheReadInputTokens, req.CacheWriteInputTokens, string(metadata), req.CreatedAt,
 	)
 	if err != nil {
 		r.logger.Warn(ctx, "failed to record token usage", slog.Error(err))

diff --git a/example/schema.go b/example/schema.go
@@ -22,6 +22,8 @@ func initSchema(db *sql.DB) error {
 		provider_response_id TEXT NOT NULL,
 		input_tokens INTEGER NOT NULL,
 		output_tokens INTEGER NOT NULL,
+		cache_read_input_tokens INTEGER NOT NULL DEFAULT 0,
+		cache_write_input_tokens INTEGER NOT NULL DEFAULT 0,
 		metadata TEXT,
 		created_at DATETIME NOT NULL,
 		FOREIGN KEY (interception_id) REFERENCES aibridge_interceptions(id)

diff --git a/intercept/chatcompletions/base.go b/intercept/chatcompletions/base.go
@@ -215,7 +215,7 @@ func sumUsage(ref, in openai.CompletionUsage) openai.CompletionUsage {
 // calculateActualInputTokenUsage accounts for cached tokens which are included in [openai.CompletionUsage].PromptTokens.
 func calculateActualInputTokenUsage(in openai.CompletionUsage) int64 {
 	// Input *includes* the cached tokens, so we subtract them here to reflect actual input token usage.
-	// The original value can be reconstructed by referencing the "prompt_cached" field in metadata.
+	// The original value can be reconstructed by adding CachedTokens back to Input.
 	// See https://platform.openai.com/docs/api-reference/usage/completions_object#usage/completions_object-input_tokens.
 	return in.PromptTokens /* The aggregated number of text input tokens used, including cached tokens. */ -
 		in.PromptTokensDetails.CachedTokens /* The aggregated number of text input tokens that has been cached from previous requests. */

diff --git a/intercept/chatcompletions/blocking.go b/intercept/chatcompletions/blocking.go
@@ -112,13 +112,14 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 		cumulativeUsage = sumUsage(cumulativeUsage, completion.Usage)
 
 		_ = i.recorder.RecordTokenUsage(ctx, &recorder.TokenUsageRecord{
-			InterceptionID: i.ID().String(),
-			MsgID:          completion.ID,
-			Input:          calculateActualInputTokenUsage(lastUsage),
-			Output:         lastUsage.CompletionTokens,
+			InterceptionID:       i.ID().String(),
+			MsgID:                completion.ID,
+			Input:                calculateActualInputTokenUsage(lastUsage),
+			Output:               lastUsage.CompletionTokens,
+			CacheReadInputTokens: lastUsage.PromptTokensDetails.CachedTokens,
 			ExtraTokenTypes: map[string]int64{
 				"prompt_audio":                   lastUsage.PromptTokensDetails.AudioTokens,
-				"prompt_cached":                  lastUsage.PromptTokensDetails.CachedTokens,
+				"prompt_cached":                  lastUsage.PromptTokensDetails.CachedTokens, // TODO: remove from ExtraTokenTypes (https://github.com/coder/aibridge/issues/243)
 				"completion_accepted_prediction": lastUsage.CompletionTokensDetails.AcceptedPredictionTokens,
 				"completion_rejected_prediction": lastUsage.CompletionTokensDetails.RejectedPredictionTokens,
 				"completion_audio":               lastUsage.CompletionTokensDetails.AudioTokens,

diff --git a/intercept/chatcompletions/streaming.go b/intercept/chatcompletions/streaming.go
@@ -212,13 +212,14 @@ func (i *StreamingInterception) ProcessRequest(w http.ResponseWriter, r *http.Re
 			// If the usage information is set, track it.
 			// The API will send usage information when the response terminates, which will happen if a tool call is invoked.
 			_ = i.recorder.RecordTokenUsage(streamCtx, &recorder.TokenUsageRecord{
-				InterceptionID: i.ID().String(),
-				MsgID:          processor.getMsgID(),
-				Input:          calculateActualInputTokenUsage(lastUsage),
-				Output:         lastUsage.CompletionTokens,
+				InterceptionID:       i.ID().String(),
+				MsgID:                processor.getMsgID(),
+				Input:                calculateActualInputTokenUsage(lastUsage),
+				Output:               lastUsage.CompletionTokens,
+				CacheReadInputTokens: lastUsage.PromptTokensDetails.CachedTokens,
 				ExtraTokenTypes: map[string]int64{
 					"prompt_audio":                   lastUsage.PromptTokensDetails.AudioTokens,
-					"prompt_cached":                  lastUsage.PromptTokensDetails.CachedTokens,
+					"prompt_cached":                  lastUsage.PromptTokensDetails.CachedTokens, // TODO: remove from ExtraTokenTypes (https://github.com/coder/aibridge/issues/243)
 					"completion_accepted_prediction": lastUsage.CompletionTokensDetails.AcceptedPredictionTokens,
 					"completion_rejected_prediction": lastUsage.CompletionTokensDetails.RejectedPredictionTokens,
 					"completion_audio":               lastUsage.CompletionTokensDetails.AudioTokens,

diff --git a/intercept/messages/blocking.go b/intercept/messages/blocking.go
@@ -128,14 +128,16 @@ func (i *BlockingInterception) ProcessRequest(w http.ResponseWriter, r *http.Req
 		}
 
 		_ = i.recorder.RecordTokenUsage(ctx, &recorder.TokenUsageRecord{
-			InterceptionID: i.ID().String(),
-			MsgID:          resp.ID,
-			Input:          resp.Usage.InputTokens,
-			Output:         resp.Usage.OutputTokens,
+			InterceptionID:        i.ID().String(),
+			MsgID:                 resp.ID,
+			Input:                 resp.Usage.InputTokens,
+			Output:                resp.Usage.OutputTokens,
+			CacheReadInputTokens:  resp.Usage.CacheReadInputTokens,
+			CacheWriteInputTokens: resp.Usage.CacheCreationInputTokens,
 			ExtraTokenTypes: map[string]int64{
 				"web_search_requests":      resp.Usage.ServerToolUse.WebSearchRequests,
-				"cache_creation_input":     resp.Usage.CacheCreationInputTokens,
-				"cache_read_input":         resp.Usage.CacheReadInputTokens,
+				"cache_creation_input":     resp.Usage.CacheCreationInputTokens, // TODO: remove from ExtraTokenTypes (https://github.com/coder/aibridge/issues/243)
+				"cache_read_input":         resp.Usage.CacheReadInputTokens,     // TODO: remove from ExtraTokenTypes (https://github.com/coder/aibridge/issues/243)
 				"cache_ephemeral_1h_input": resp.Usage.CacheCreation.Ephemeral1hInputTokens,
 				"cache_ephemeral_5m_input": resp.Usage.CacheCreation.Ephemeral5mInputTokens,
 			},

diff --git a/intercept/messages/streaming.go b/intercept/messages/streaming.go
@@ -203,14 +203,16 @@ newStream:
 				accumulateUsage(&cumulativeUsage, start.Message.Usage)
 
 				_ = i.recorder.RecordTokenUsage(streamCtx, &recorder.TokenUsageRecord{
-					InterceptionID: i.ID().String(),
-					MsgID:          message.ID,
-					Input:          start.Message.Usage.InputTokens,
-					Output:         start.Message.Usage.OutputTokens,
+					InterceptionID:        i.ID().String(),
+					MsgID:                 message.ID,
+					Input:                 start.Message.Usage.InputTokens,
+					Output:                start.Message.Usage.OutputTokens,
+					CacheReadInputTokens:  start.Message.Usage.CacheReadInputTokens,
+					CacheWriteInputTokens: start.Message.Usage.CacheCreationInputTokens,
 					ExtraTokenTypes: map[string]int64{
 						"web_search_requests":      start.Message.Usage.ServerToolUse.WebSearchRequests,
-						"cache_creation_input":     start.Message.Usage.CacheCreationInputTokens,
-						"cache_read_input":         start.Message.Usage.CacheReadInputTokens,
+						"cache_creation_input":     start.Message.Usage.CacheCreationInputTokens, // TODO: remove from ExtraTokenTypes (https://github.com/coder/aibridge/issues/243)
+						"cache_read_input":         start.Message.Usage.CacheReadInputTokens,     // TODO: remove from ExtraTokenTypes (https://github.com/coder/aibridge/issues/243)
 						"cache_ephemeral_1h_input": start.Message.Usage.CacheCreation.Ephemeral1hInputTokens,
 						"cache_ephemeral_5m_input": start.Message.Usage.CacheCreation.Ephemeral5mInputTokens,
 					},

diff --git a/intercept/responses/base.go b/intercept/responses/base.go
@@ -239,12 +239,13 @@ func (i *responsesInterceptionBase) recordTokenUsage(ctx context.Context, respon
 	inputNonCacheTokens := usage.InputTokens - usage.InputTokensDetails.CachedTokens
 
 	if err := i.recorder.RecordTokenUsage(ctx, &recorder.TokenUsageRecord{
-		InterceptionID: i.ID().String(),
-		MsgID:          response.ID,
-		Input:          inputNonCacheTokens,
-		Output:         usage.OutputTokens,
+		InterceptionID:       i.ID().String(),
+		MsgID:                response.ID,
+		Input:                inputNonCacheTokens,
+		Output:               usage.OutputTokens,
+		CacheReadInputTokens: usage.InputTokensDetails.CachedTokens,
 		ExtraTokenTypes: map[string]int64{
-			"input_cached":     usage.InputTokensDetails.CachedTokens,
+			"input_cached":     usage.InputTokensDetails.CachedTokens, // TODO: remove from ExtraTokenTypes (https://github.com/coder/aibridge/issues/243)
 			"output_reasoning": usage.OutputTokensDetails.ReasoningTokens,
 			"total_tokens":     usage.TotalTokens,
 		},

diff --git a/intercept/responses/base_test.go b/intercept/responses/base_test.go
@@ -297,10 +297,11 @@ func TestRecordTokenUsage(t *testing.T) {
 				},
 			},
 			expected: &recorder.TokenUsageRecord{
-				InterceptionID: id.String(),
-				MsgID:          "resp_full",
-				Input:          5, // 10 input - 5 cached
-				Output:         20,
+				InterceptionID:       id.String(),
+				MsgID:                "resp_full",
+				Input:                5, // 10 input - 5 cached
+				Output:               20,
+				CacheReadInputTokens: 5,
 				ExtraTokenTypes: map[string]int64{
 					"input_cached":     5,
 					"output_reasoning": 5,

diff --git a/internal/integrationtest/bridge_test.go b/internal/integrationtest/bridge_test.go
@@ -44,25 +44,31 @@ func TestAnthropicMessages(t *testing.T) {
 		t.Parallel()
 
 		cases := []struct {
-			name                 string
-			streaming            bool
-			expectedInputTokens  int
-			expectedOutputTokens int
-			expectedToolCallID   string
+			name                          string
+			streaming                     bool
+			expectedInputTokens           int
+			expectedOutputTokens          int
+			expectedCacheReadInputTokens  int
+			expectedCacheWriteInputTokens int
+			expectedToolCallID            string
 		}{
 			{
-				name:                 "streaming",
-				streaming:            true,
-				expectedInputTokens:  2,
-				expectedOutputTokens: 66,
-				expectedToolCallID:   "toolu_01RX68weRSquLx6HUTj65iBo",
+				name:                          "streaming",
+				streaming:                     true,
+				expectedInputTokens:           2,
+				expectedOutputTokens:          66,
+				expectedCacheReadInputTokens:  13993,
+				expectedCacheWriteInputTokens: 22,
+				expectedToolCallID:            "toolu_01RX68weRSquLx6HUTj65iBo",
 			},
 			{
-				name:                 "non-streaming",
-				streaming:            false,
-				expectedInputTokens:  5,
-				expectedOutputTokens: 84,
-				expectedToolCallID:   "toolu_01AusGgY5aKFhzWrFBv9JfHq",
+				name:                          "non-streaming",
+				streaming:                     false,
+				expectedInputTokens:           5,
+				expectedOutputTokens:          84,
+				expectedCacheReadInputTokens:  23490,
+				expectedCacheWriteInputTokens: 0,
+				expectedToolCallID:            "toolu_01AusGgY5aKFhzWrFBv9JfHq",
 			},
 		}
 
@@ -104,6 +110,8 @@ func TestAnthropicMessages(t *testing.T) {
 
 				assert.EqualValues(t, tc.expectedInputTokens, bridgeServer.Recorder.TotalInputTokens(), "input tokens miscalculated")
 				assert.EqualValues(t, tc.expectedOutputTokens, bridgeServer.Recorder.TotalOutputTokens(), "output tokens miscalculated")
+				assert.EqualValues(t, tc.expectedCacheReadInputTokens, bridgeServer.Recorder.TotalCacheReadInputTokens(), "cache read input tokens miscalculated")
+				assert.EqualValues(t, tc.expectedCacheWriteInputTokens, bridgeServer.Recorder.TotalCacheWriteInputTokens(), "cache write input tokens miscalculated")
 
 				toolUsages := bridgeServer.Recorder.RecordedToolUsages()
 				require.Len(t, toolUsages, 1)

diff --git a/internal/integrationtest/metrics_test.go b/internal/integrationtest/metrics_test.go
@@ -16,6 +16,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	promtest "github.com/prometheus/client_golang/prometheus/testutil"
 	"github.com/stretchr/testify/require"
+	"github.com/tidwall/sjson"
 )
 
 func TestMetrics_Interception(t *testing.T) {
@@ -270,36 +271,104 @@ func TestMetrics_PromptCount(t *testing.T) {
 func TestMetrics_TokenUseCount(t *testing.T) {
 	t.Parallel()
 
-	ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
-	t.Cleanup(cancel)
-
-	fix := fixtures.Parse(t, fixtures.OaiResponsesBlockingCachedInputTokens)
-	upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
+	cases := []struct {
+		name           string
+		fixture        []byte
+		reqPath        string
+		streaming      bool
+		expectProvider string
+		expectModel    string
+		expectedLabels map[string]float64
+	}{
+		{
+			name:           "openai_responses",
+			fixture:        fixtures.OaiResponsesBlockingCachedInputTokens,
+			reqPath:        pathOpenAIResponses,
+			expectProvider: config.ProviderOpenAI,
+			expectModel:    "gpt-4.1",
+			expectedLabels: map[string]float64{
+				"input":                    129, // 12033 - 11904 cached
+				"output":                   44,
+				"cache_read_input_tokens":  11904,
+				"cache_write_input_tokens": 0,
+				"input_cached":             11904,
+				"output_reasoning":         0,
+				"total_tokens":             12077,
+			},
+		},
+		{
+			name:           "anthropic_messages_streaming",
+			fixture:        fixtures.AntSingleBuiltinTool,
+			reqPath:        pathAnthropicMessages,
+			streaming:      true,
+			expectProvider: config.ProviderAnthropic,
+			expectModel:    "claude-sonnet-4-20250514",
+			expectedLabels: map[string]float64{
+				"input":                    2,
+				"output":                   66,
+				"cache_read_input_tokens":  13993,
+				"cache_write_input_tokens": 22,
+				"cache_read_input":         13993,
+				"cache_creation_input":     22,
+			},
+		},
+		{
+			name:           "openai_chat_completions",
+			fixture:        fixtures.OaiChatSimple,
+			reqPath:        pathOpenAIChatCompletions,
+			expectProvider: config.ProviderOpenAI,
+			expectModel:    "gpt-4.1",
+			expectedLabels: map[string]float64{
+				"input":                          19,
+				"output":                         200,
+				"cache_read_input_tokens":        0,
+				"cache_write_input_tokens":       0,
+				"prompt_cached":                  0,
+				"completion_reasoning":           0,
+				"completion_accepted_prediction": 0,
+				"completion_rejected_prediction": 0,
+			},
+		},
+	}
 
-	m := aibridge.NewMetrics(prometheus.NewRegistry())
-	bridgeServer := newBridgeTestServer(t, ctx, upstream.URL,
-		withMetrics(m),
-	)
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
 
-	resp := bridgeServer.makeRequest(t, http.MethodPost, pathOpenAIResponses, fix.Request(),
-		http.Header{"User-Agent": []string{"claude-code/1.0.0"}})
-	require.Equal(t, http.StatusOK, resp.StatusCode)
-	_, _ = io.ReadAll(resp.Body)
+			ctx, cancel := context.WithTimeout(t.Context(), time.Second*30)
+			t.Cleanup(cancel)
 
-	clientLabel := string(aibridge.ClientClaudeCode)
-	// Token metrics are recorded asynchronously; wait for them to appear.
-	require.Eventually(t, func() bool {
-		return promtest.ToFloat64(m.TokenUseCount.WithLabelValues(
-			config.ProviderOpenAI, "gpt-4.1", "input", defaultActorID, clientLabel)) > 0
-	}, time.Second*10, time.Millisecond*50)
+			fix := fixtures.Parse(t, tc.fixture)
+			upstream := newMockUpstream(t, ctx, newFixtureResponse(fix))
 
-	require.Equal(t, 129.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "input", defaultActorID, clientLabel))) // 12033 - 11904 (cached)
-	require.Equal(t, 44.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "output", defaultActorID, clientLabel)))
+			m := aibridge.NewMetrics(prometheus.NewRegistry())
+			bridgeServer := newBridgeTestServer(t, ctx, upstream.URL,
+				withMetrics(m),
+			)
 
-	// ExtraTokenTypes
-	require.Equal(t, 11904.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "input_cached", defaultActorID, clientLabel)))
-	require.Equal(t, 0.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "output_reasoning", defaultActorID, clientLabel)))
-	require.Equal(t, 12077.0, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(config.ProviderOpenAI, "gpt-4.1", "total_tokens", defaultActorID, clientLabel)))
+			reqBody := fix.Request()
+			if tc.streaming {
+				var err error
+				reqBody, err = sjson.SetBytes(reqBody, "stream", true)
+				require.NoError(t, err)
+			}
+			resp := bridgeServer.makeRequest(t, http.MethodPost, tc.reqPath, reqBody, nil)
+			require.Equal(t, http.StatusOK, resp.StatusCode)
+			_, _ = io.ReadAll(resp.Body)
+
+			// metrics are updated asynchronously
+			require.Eventually(t, func() bool {
+				return promtest.ToFloat64(m.TokenUseCount.WithLabelValues(
+					tc.expectProvider, tc.expectModel, "input", defaultActorID, string(aibridge.ClientUnknown))) > 0
+			}, time.Second*10, time.Millisecond*50)
+
+			for label, expected := range tc.expectedLabels {
+				require.Equal(t, expected, promtest.ToFloat64(m.TokenUseCount.WithLabelValues(
+					tc.expectProvider, tc.expectModel, label, defaultActorID, string(aibridge.ClientUnknown),
+				)), "metric label %q mismatch", label)
+			}
+		})
+	}
 }
 
 func TestMetrics_NonInjectedToolUseCount(t *testing.T) {