mark3labs · ezynda3 · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/README.md b/README.md
@@ -312,12 +312,15 @@ kit -e examples/extensions/minimal.go
 
 ### Extension Capabilities
 
-**Lifecycle Events**: OnSessionStart, OnSessionShutdown, OnBeforeAgentStart, OnAgentStart, OnAgentEnd, OnToolCall, OnToolCallInputStart, OnToolCallInputDelta, OnToolCallInputEnd, OnToolExecutionStart, OnToolOutput, OnToolExecutionEnd, OnToolResult, OnInput, OnMessageStart, OnMessageUpdate, OnMessageEnd, OnModelChange, OnContextPrepare, OnBeforeFork, OnBeforeSessionSwitch, OnBeforeCompact, OnCustomEvent, OnSubagentStart, OnSubagentChunk, OnSubagentEnd
+**Lifecycle Events**: OnSessionStart, OnSessionShutdown, OnBeforeAgentStart, OnAgentStart, OnAgentEnd, OnLLMUsage, OnToolCall, OnToolCallInputStart, OnToolCallInputDelta, OnToolCallInputEnd, OnToolExecutionStart, OnToolOutput, OnToolExecutionEnd, OnToolResult, OnInput, OnMessageStart, OnMessageUpdate, OnMessageEnd, OnModelChange, OnContextPrepare, OnBeforeFork, OnBeforeSessionSwitch, OnBeforeCompact, OnCustomEvent, OnSubagentStart, OnSubagentChunk, OnSubagentEnd
+
+`OnAgentEnd` carries per-turn aggregates (`ToolCallCount`, `ToolNames`, `LLMCallCount`, `InputTokensDelta`, `OutputTokensDelta`, `CostDelta`, `DurationMs`) so observers don't need to maintain parallel bookkeeping. `OnLLMUsage` fires after each LLM provider call with token + cost deltas attributed to that specific call/model — use it for accurate budget enforcement *between* calls instead of waiting for the turn to finish.
 
 **Custom Components**:
 - **Tools**: Add new tools the LLM can invoke
 - **Commands**: Register slash commands (e.g., `/mycommand`)
 - **Options**: Register configurable extension options
+- **Session State**: Last-write-wins key-value store via `ctx.SetState` / `GetState` / `DeleteState` / `ListState`, persisted to a per-session sidecar file outside the conversation tree
 - **Widgets**: Persistent status displays above/below input
 - **Headers/Footers**: Persistent content above/below the conversation
 - **Status Bar**: Custom status bar entries
@@ -373,6 +376,7 @@ See the `examples/extensions/` directory:
 - [`tool-logger.go`](examples/extensions/tool-logger.go) - Log all tool calls
 - [`neon-theme.go`](examples/extensions/neon-theme.go) - Custom theme registration and switching
 - [`tool-renderer-demo.go`](examples/extensions/tool-renderer-demo.go) - Custom tool call rendering
+- [`usage-budget.go`](examples/extensions/usage-budget.go) - Per-call usage callback (`OnLLMUsage`), session state, and enriched `OnAgentEnd` per-turn report
 - [`widget-status.go`](examples/extensions/widget-status.go) - Persistent status widgets
 
 Also see [`.kit/extensions/go-edit-lint.go`](.kit/extensions/go-edit-lint.go) (in this repo) for a project-local extension example that runs gopls and golangci-lint on Go file edits.

diff --git a/cmd/extension_context.go b/cmd/extension_context.go
@@ -190,6 +190,18 @@ func buildInteractiveExtensionContext(deps extensionContextDeps) extensions.Cont
 		GetEntries: func(entryType string) []extensions.ExtensionEntry {
 			return kitInstance.Extensions().GetEntries(entryType)
 		},
+		SetState: func(key string, value string) {
+			kitInstance.Extensions().SetState(key, value)
+		},
+		GetState: func(key string) (string, bool) {
+			return kitInstance.Extensions().GetState(key)
+		},
+		DeleteState: func(key string) {
+			kitInstance.Extensions().DeleteState(key)
+		},
+		ListState: func() []string {
+			return kitInstance.Extensions().ListState()
+		},
 		SetEditorText: func(text string) {
 			appInstance.SetEditorTextFromExtension(text)
 		},

diff --git a/cmd/root.go b/cmd/root.go
@@ -931,6 +931,9 @@ func runNormalMode(ctx context.Context) error {
 			startupExtensionMessages = append(startupExtensionMessages, text)
 		}
 		kitInstance.Extensions().SetContext(extCtx)
+		if err := kitInstance.Extensions().InitStatePersistence(); err != nil {
+			log.Printf("WARN extension state init failed: %v", err)
+		}
 		kitInstance.Extensions().EmitSessionStart()
 
 		// Restore normal print functions for runtime use.

diff --git a/examples/extensions/README.md b/examples/extensions/README.md
@@ -58,6 +58,7 @@ kit install github.com/mark3labs/kit/examples/extensions --local
 | `project-rules.go` | Project-specific rules | Session data, file reading |
 | `protected-paths.go` | Block dangerous operations | `OnToolCall` with blocking |
 | `permission-gate.go` | Confirm destructive actions | `OnToolCall` with confirmation |
+| `usage-budget.go` | Soft cost cap + per-turn report | `OnLLMUsage`, `SetState`/`GetState`, enriched `AgentEndEvent` |
 
 ### Tools & Commands
 

diff --git a/examples/extensions/usage-budget.go b/examples/extensions/usage-budget.go
@@ -0,0 +1,87 @@
+//go:build ignore
+
+package main
+
+import (
+	"fmt"
+	"strconv"
+
+	"kit/ext"
+)
+
+// Init demonstrates the three primitives added in issue #53:
+//
+//  1. api.OnLLMUsage(...) — per-LLM-call usage callback with token + cost
+//     deltas. Use this for budget enforcement that reacts between calls
+//     within a single agent turn, rather than only at turn boundaries.
+//
+//  2. ctx.SetState / ctx.GetState / ctx.DeleteState / ctx.ListState —
+//     last-write-wins, session-scoped key-value store backed by a sidecar
+//     file. Use this for snapshot state (current value of X) instead of
+//     ctx.AppendEntry, which is append-only and bloats branch reads.
+//
+//  3. ext.AgentEndEvent.ToolCallCount / .ToolNames / .LLMCallCount /
+//     .InputTokensDelta / .OutputTokensDelta / .CostDelta / .DurationMs —
+//     per-turn aggregates so observer extensions don't need to maintain
+//     parallel bookkeeping.
+//
+// Together these support a simple soft-budget cap: warn when the
+// cumulative cost in this session exceeds a threshold, and print a
+// per-turn report on AgentEnd.
+//
+// Usage: kit -e examples/extensions/usage-budget.go
+func Init(api ext.API) {
+	const warnAtKey = "usage-budget:warn-at-usd"
+
+	// 1. Print per-LLM-call usage with provider, model, and cost.
+	api.OnLLMUsage(func(e ext.LLMUsageEvent, ctx ext.Context) {
+		ctx.Print(fmt.Sprintf(
+			"[usage] step=%d %s/%s tokens=↑%d ↓%d cache=↑%d/↓%d cost=$%.4f (%s)",
+			e.StepNumber, e.Provider, e.Model,
+			e.InputTokens, e.OutputTokens,
+			e.CacheWriteTokens, e.CacheReadTokens,
+			e.Cost, e.FinishReason,
+		))
+
+		// 2. Persist running total in last-write-wins state.
+		current := 0.0
+		if raw, ok := ctx.GetState("usage-budget:total-cost"); ok {
+			current, _ = strconv.ParseFloat(raw, 64)
+		}
+		current += e.Cost
+		ctx.SetState("usage-budget:total-cost", strconv.FormatFloat(current, 'f', 6, 64))
+
+		// Soft warn-at threshold (configurable via state).
+		warnAt := 0.50
+		if raw, ok := ctx.GetState(warnAtKey); ok {
+			if v, err := strconv.ParseFloat(raw, 64); err == nil {
+				warnAt = v
+			}
+		}
+		if current > warnAt {
+			ctx.PrintError(fmt.Sprintf(
+				"[usage] session cost $%.4f exceeds soft cap $%.2f",
+				current, warnAt,
+			))
+		}
+	})
+
+	// 3. Print a per-turn summary using the enriched AgentEndEvent.
+	api.OnAgentEnd(func(e ext.AgentEndEvent, ctx ext.Context) {
+		ctx.Print(fmt.Sprintf(
+			"[turn] stop=%s tools=%d llm-calls=%d tokens=↑%d ↓%d cost=$%.4f duration=%dms",
+			e.StopReason, e.ToolCallCount, e.LLMCallCount,
+			e.InputTokensDelta, e.OutputTokensDelta, e.CostDelta, e.DurationMs,
+		))
+		if len(e.ToolNames) > 0 {
+			ctx.Print(fmt.Sprintf("[turn] tool order: %v", e.ToolNames))
+		}
+	})
+
+	// Bootstrap default soft cap once per session.
+	api.OnSessionStart(func(e ext.SessionStartEvent, ctx ext.Context) {
+		if _, ok := ctx.GetState(warnAtKey); !ok {
+			ctx.SetState(warnAtKey, "0.50")
+		}
+	})
+}
diff --git a/internal/extensions/api.go b/internal/extensions/api.go
@@ -341,6 +341,13 @@ type Context struct {
 	// The data survives across session restarts and can be retrieved via
 	// GetEntries. Use entryType to namespace your data (e.g. "myext:state").
 	//
+	// AppendEntry is append-only and lives in the conversation tree, which
+	// makes it the right tool for audit logs and event histories. For
+	// last-write-wins snapshot state — "what's the current value of X?" —
+	// prefer SetState / GetState instead. Those primitives store data in a
+	// sidecar file outside the conversation tree, are O(1) to read/write,
+	// and do not bloat branch reads or duplicate on fork.
+	//
 	// Example:
 	//
 	//   data, _ := json.Marshal(myState)
@@ -360,6 +367,45 @@ type Context struct {
 	//   }
 	GetEntries func(entryType string) []ExtensionEntry
 
+	// SetState stores a key-value pair in session-scoped, last-write-wins
+	// extension state. Unlike AppendEntry the value is kept in a sidecar
+	// file outside the conversation tree, so:
+	//   - reads are O(1) (no branch walk)
+	//   - writes don't bloat the session JSONL
+	//   - state is not duplicated on fork (branches share the sidecar)
+	//   - state is invisible to the LLM
+	//
+	// Use SetState for snapshot state ("current value of X"); use
+	// AppendEntry for audit logs and event histories. Namespace keys with
+	// your extension name to avoid collisions (e.g. "myext:budget-cap").
+	//
+	// State persists for the lifetime of the session. For ephemeral or
+	// in-memory sessions the state lives only in memory.
+	//
+	// Example:
+	//
+	//   ctx.SetState("myext:budget-cap", "10.00")
+	SetState func(key string, value string)
+
+	// GetState returns the value previously stored via SetState. The bool
+	// is false when the key was never written. Returns ("", false) when
+	// state is unavailable.
+	//
+	// Example:
+	//
+	//   if cap, ok := ctx.GetState("myext:budget-cap"); ok {
+	//       fmt.Println("current cap:", cap)
+	//   }
+	GetState func(key string) (string, bool)
+
+	// DeleteState removes a key from session-scoped extension state.
+	// No-op when the key is missing.
+	DeleteState func(key string)
+
+	// ListState returns all keys currently stored in session-scoped
+	// extension state, in unspecified order.
+	ListState func() []string
+
 	// SetEditorText sets the text content of the input editor. This can
 	// be used to pre-fill the editor with suggested text (e.g. extracted
 	// questions, handoff prompts). The cursor is moved to the end.
@@ -1102,6 +1148,7 @@ type API struct {
 	onError                   func(func(ErrorEvent, Context))
 	onRetry                   func(func(RetryEvent, Context))
 	onPrepareStep             func(func(PrepareStepEvent, Context) *PrepareStepResult)
+	onLLMUsage                func(func(LLMUsageEvent, Context))
 }
 
 // OnToolCall registers a handler that fires before a tool executes.
@@ -1359,6 +1406,19 @@ func (a *API) OnPrepareStep(handler func(PrepareStepEvent, Context) *PrepareStep
 	a.onPrepareStep(handler)
 }
 
+// OnLLMUsage registers a handler that fires after each LLM provider call
+// with the token and cost deltas for that single call. Use this for
+// per-call usage attribution, real-time budget enforcement, and cost
+// dashboards that need to react between calls within a single agent turn.
+//
+// Handlers receive an LLMUsageEvent describing the call's input/output
+// tokens, cache tokens, computed cost, model, and provider. A single agent
+// turn typically fires multiple LLMUsageEvents (one per tool-loop
+// iteration).
+func (a *API) OnLLMUsage(handler func(LLMUsageEvent, Context)) {
+	a.onLLMUsage(handler)
+}
+
 // RegisterToolRenderer registers a custom renderer for a specific tool's
 // display in the TUI. The renderer controls the header (parameter summary)
 // and/or body (result display) of the tool's output block. If multiple
@@ -2091,10 +2151,47 @@ type AgentStartEvent struct {
 
 func (e AgentStartEvent) Type() EventType { return AgentStart }
 
-// AgentEndEvent fires when the agent finishes responding.
+// AgentEndEvent fires when the agent finishes responding. In addition to the
+// final response and stop reason, the event carries per-turn aggregates so
+// observer-style extensions don't have to maintain parallel bookkeeping in
+// OnToolResult / OnStepFinish handlers.
 type AgentEndEvent struct {
 	Response   string
 	StopReason string // "completed", "cancelled", "error"
+
+	// ToolCallCount is the total number of tool invocations observed during
+	// this turn (sum across all steps).
+	ToolCallCount int
+
+	// ToolNames lists the tool names invoked during this turn, in call order.
+	// Duplicates are preserved (e.g. two bash calls produce ["bash", "bash"]).
+	ToolNames []string
+
+	// LLMCallCount is the number of LLM round-trips (tool-loop iterations)
+	// performed during this turn. Always >= 1 for a successful turn.
+	LLMCallCount int
+
+	// InputTokensDelta is the sum of input tokens consumed during this turn
+	// across every LLM call (including cache-hit input tokens).
+	InputTokensDelta int
+
+	// OutputTokensDelta is the sum of output tokens generated during this turn.
+	OutputTokensDelta int
+
+	// CacheReadTokensDelta is the sum of cache-read tokens during this turn.
+	CacheReadTokensDelta int
+
+	// CacheWriteTokensDelta is the sum of cache-write tokens during this turn.
+	CacheWriteTokensDelta int
+
+	// CostDelta is the total cost in USD attributable to this turn. Computed
+	// from per-step usage and current model pricing. Zero when pricing is
+	// unknown or OAuth credentials are in use.
+	CostDelta float64
+
+	// DurationMs is the elapsed wall-clock time from AgentStart to AgentEnd,
+	// in milliseconds.
+	DurationMs int64
 }
 
 func (e AgentEndEvent) Type() EventType { return AgentEnd }
@@ -2403,6 +2500,43 @@ type PrepareStepResult struct {
 
 func (PrepareStepResult) isResult() {}
 
+// LLMUsageEvent fires after each LLM provider call with the per-call token
+// and cost deltas. Use this for accurate budget tracking, cost dashboards,
+// and any logic that needs to react between LLM calls within a single agent
+// turn (rather than only at turn boundaries).
+//
+// A single agent turn typically produces multiple LLMUsageEvents (one per
+// tool-loop iteration). The Model and Provider fields reflect the model used
+// for that specific call, which may differ from earlier calls if the
+// extension switched models mid-turn via ctx.SetModel().
+type LLMUsageEvent struct {
+	// InputTokens is the number of input tokens for this call.
+	InputTokens int
+	// OutputTokens is the number of output tokens generated by this call.
+	OutputTokens int
+	// CacheReadTokens is the number of cache-hit input tokens (provider-specific).
+	CacheReadTokens int
+	// CacheWriteTokens is the number of cache-write tokens.
+	CacheWriteTokens int
+	// Cost is the USD cost of this call computed from the model's per-token
+	// pricing. Zero when pricing is unknown or OAuth credentials are in use.
+	Cost float64
+	// Model is the model identifier used for this call (e.g. "claude-sonnet-4-5-20250929").
+	Model string
+	// Provider is the provider identifier (e.g. "anthropic", "openai").
+	Provider string
+	// RequestID is an optional correlation id for the underlying provider
+	// call. May be empty when the provider does not surface one.
+	RequestID string
+	// StepNumber is the zero-based step index within the current agent turn.
+	StepNumber int
+	// FinishReason mirrors the provider's finish reason for this call
+	// (e.g. "stop", "tool_calls", "length"). May be empty.
+	FinishReason string
+}
+
+func (e LLMUsageEvent) Type() EventType { return LLMUsage }
+
 // ThemeColor is an adaptive color pair with light and dark hex values.
 // Either field may be empty to inherit from the default theme.
 type ThemeColor struct {

diff --git a/internal/extensions/events.go b/internal/extensions/events.go
@@ -125,6 +125,11 @@ const (
 	// after steering messages are injected and before messages are sent
 	// to the LLM. Handlers can replace the context window for this step.
 	PrepareStep EventType = "prepare_step"
+
+	// LLMUsage fires after each LLM provider call with the token and cost
+	// deltas for that single call. Extensions use it to attribute usage to
+	// specific calls/models and to drive budget enforcement between calls.
+	LLMUsage EventType = "llm_usage"
 )
 
 // AllEventTypes returns every supported event type.
@@ -139,7 +144,7 @@ func AllEventTypes() []EventType {
 		BeforeFork, BeforeSessionSwitch, BeforeCompact,
 		SubagentStart, SubagentChunk, SubagentEnd,
 		StepStart, StepFinish, ReasoningStart, Warnings, Source, Error, Retry,
-		PrepareStep,
+		PrepareStep, LLMUsage,
 	}
 }
 

diff --git a/internal/extensions/events_test.go b/internal/extensions/events_test.go
@@ -4,8 +4,8 @@ import "testing"
 
 func TestAllEventTypes_Count(t *testing.T) {
 	all := AllEventTypes()
-	if len(all) != 32 {
-		t.Fatalf("expected 32 event types, got %d", len(all))
+	if len(all) != 33 {
+		t.Fatalf("expected 33 event types, got %d", len(all))
 	}
 }