From 0641c92acc231ced206daed0bb3f1b082ab4a249 Mon Sep 17 00:00:00 2001
From: Ed Zynda <ezynda3@gmail.com>
Date: Fri, 17 Apr 2026 11:24:00 +0300
Subject: [PATCH 1/7] feat(sdk): expose generation and provider params on
 Options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds programmatic overrides on kit.Options for the model/provider knobs
that were previously only reachable through viper.Set() — letting SDK
consumers (web apps, services, embedded agents) configure kit fully
in-code without polluting global viper state or shipping .kit.yml.

Generation parameters:
  - MaxTokens         int      (max output tokens per response)
  - ThinkingLevel     string   (off/low/medium/high)
  - Temperature       *float32
  - TopP              *float32
  - TopK              *int32
  - FrequencyPenalty  *float32
  - PresencePenalty   *float32

Sampling params use pointer types so explicit 0 is distinguishable from
unset; nil leaves provider/per-model defaults in place.

Provider configuration:
  - ProviderAPIKey    string
  - ProviderURL       string
  - TLSSkipVerify     bool

Implementation just pushes Options values into viper inside New(),
so all existing downstream code (BuildProviderConfig, SetModel,
modelSettings lookups, runtime model switching) picks them up
uniformly without any new code paths. Tests added for MaxTokens,
ThinkingLevel, and ProviderAPIKey.
---
 pkg/kit/kit.go      | 105 ++++++++++++++++++++++++++++++++++++++++++++
 pkg/kit/kit_test.go |  73 ++++++++++++++++++++++++++++++
 2 files changed, 178 insertions(+)

diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go
index 4e587525..f17335ea 100644
--- a/pkg/kit/kit.go
+++ b/pkg/kit/kit.go
@@ -821,6 +821,70 @@ type Options struct {
 	Tools        []Tool // Custom tool set. If empty, AllTools() is used.
 	ExtraTools   []Tool // Additional tools added alongside core/MCP/extension tools.
 
+	// Generation parameters. These override the corresponding values from
+	// .kit.yml / KIT_* environment variables. Leaving a field at its
+	// zero/nil value means "use the configured default", which in turn
+	// falls back to per-model defaults (modelSettings / customModels) and
+	// finally to the SDK defaults registered in setSDKDefaults().
+	//
+	// Pointer types are used for sampling parameters so the SDK can
+	// distinguish "explicitly set to 0" from "leave alone".
+
+	// MaxTokens overrides the maximum output tokens per LLM response.
+	// 0 = use the configured default (SDK default is 4096). Bump this
+	// when generating long outputs (HTML artifacts, large refactors,
+	// etc.) to avoid silent truncation mid-tool-call. The cap also
+	// applies after model switches via [Kit.SetModel].
+	MaxTokens int
+
+	// ThinkingLevel sets the reasoning effort for models that support
+	// extended thinking. Valid values: "off", "low", "medium", "high".
+	// "" = use the configured default (SDK default is "off"). Use
+	// [Kit.SetThinkingLevel] to change at runtime.
+	ThinkingLevel string
+
+	// Temperature controls sampling randomness (typically 0.0–2.0).
+	// nil = leave provider/per-model default in place. Pointer type
+	// so explicit 0.0 (deterministic) is distinguishable from "unset".
+	Temperature *float32
+
+	// TopP is the nucleus-sampling cutoff (0.0–1.0).
+	// nil = leave provider/per-model default in place.
+	TopP *float32
+
+	// TopK limits sampling to the top K tokens.
+	// nil = leave provider/per-model default in place.
+	TopK *int32
+
+	// FrequencyPenalty discourages repeated tokens (OpenAI-family models).
+	// nil = leave provider/per-model default in place.
+	FrequencyPenalty *float32
+
+	// PresencePenalty discourages repeating topics (OpenAI-family models).
+	// nil = leave provider/per-model default in place.
+	PresencePenalty *float32
+
+	// Provider configuration. These override values normally read from
+	// .kit.yml or provider-specific environment variables. Useful when
+	// loading credentials from a secrets manager, pointing at custom
+	// OpenAI-compatible endpoints (LiteLLM, vLLM, Azure OpenAI, internal
+	// proxies), or running against self-hosted infrastructure.
+
+	// ProviderAPIKey overrides the API key used to authenticate with the
+	// model provider. "" = use the value from config or the
+	// provider-specific environment variable.
+	ProviderAPIKey string
+
+	// ProviderURL overrides the provider endpoint. "" = use the provider's
+	// default URL.
+	ProviderURL string
+
+	// TLSSkipVerify disables TLS certificate verification on provider
+	// HTTP clients. Only set this for self-signed certificates in
+	// development. Once enabled here it cannot be disabled via Options
+	// (use the config file or env var to opt back out).
+	TLSSkipVerify bool
+
 	// SkipConfig, when true, skips loading .kit.yml configuration files.
 	// Viper defaults (setSDKDefaults) and environment variables (KIT_*)
 	// are still applied. Use this for fully programmatic configuration.
@@ -1047,6 +1111,47 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
 		}
 		viper.Set("stream", opts.Streaming)
 
+		// Generation parameter overrides. Each Options field, when set,
+		// is pushed into viper here so the existing downstream code
+		// (BuildProviderConfig, SetModel, modelSettings lookups) picks
+		// it up uniformly. Pointer-typed sampling params use viper.Set
+		// only when non-nil so that nil means "leave provider/per-model
+		// default in place" (BuildProviderConfig keys off viper.IsSet).
+		if opts.MaxTokens > 0 {
+			viper.Set("max-tokens", opts.MaxTokens)
+		}
+		if opts.ThinkingLevel != "" {
+			viper.Set("thinking-level", opts.ThinkingLevel)
+		}
+		if opts.Temperature != nil {
+			viper.Set("temperature", *opts.Temperature)
+		}
+		if opts.TopP != nil {
+			viper.Set("top-p", *opts.TopP)
+		}
+		if opts.TopK != nil {
+			viper.Set("top-k", *opts.TopK)
+		}
+		if opts.FrequencyPenalty != nil {
+			viper.Set("frequency-penalty", *opts.FrequencyPenalty)
+		}
+		if opts.PresencePenalty != nil {
+			viper.Set("presence-penalty", *opts.PresencePenalty)
+		}
+
+		// Provider overrides. TLSSkipVerify only takes effect when true —
+		// callers wanting to force-disable should use the config file or
+		// env var instead.
+		if opts.ProviderAPIKey != "" {
+			viper.Set("provider-api-key", opts.ProviderAPIKey)
+		}
+		if opts.ProviderURL != "" {
+			viper.Set("provider-url", opts.ProviderURL)
+		}
+		if opts.TLSSkipVerify {
+			viper.Set("tls-skip-verify", true)
+		}
+
 		// Resolve working directory for context/skill discovery.
 		cwd = opts.SessionDir
 		if cwd == "" {
diff --git a/pkg/kit/kit_test.go b/pkg/kit/kit_test.go
index 346cb1b7..0810637e 100644
--- a/pkg/kit/kit_test.go
+++ b/pkg/kit/kit_test.go
@@ -54,6 +54,79 @@ func TestNewWithOptions(t *testing.T) {
 	}
 }
 
+// TestNewWithGenerationOptions verifies that the SDK-only generation
+// parameter overrides on Options propagate all the way through to the
+// agent without requiring any viper.Set workarounds in caller code.
+func TestNewWithGenerationOptions(t *testing.T) {
+	if os.Getenv("ANTHROPIC_API_KEY") == "" {
+		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
+	}
+
+	ctx := context.Background()
+
+	// MaxTokens override — keep ThinkingLevel off so Anthropic's thinking
+	// budget doesn't auto-bump MaxTokens above what we configured.
+	t.Run("MaxTokens", func(t *testing.T) {
+		const want = 12345
+		host, err := kit.New(ctx, &kit.Options{
+			Model:     "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:     true,
+			MaxTokens: want,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if got := host.MaxTokens(); got != want {
+			t.Errorf("Options.MaxTokens=%d did not propagate; Kit.MaxTokens()=%d", want, got)
+		}
+	})
+
+	// ThinkingLevel override — verified via the public getter, which
+	// reads back the configured (not provider-derived) level.
+	t.Run("ThinkingLevel", func(t *testing.T) {
+		const want = "high"
+		host, err := kit.New(ctx, &kit.Options{
+			Model:         "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:         true,
+			ThinkingLevel: want,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if got := host.GetThinkingLevel(); got != want {
+			t.Errorf("Options.ThinkingLevel=%q did not propagate; Kit.GetThinkingLevel()=%q", want, got)
+		}
+	})
+}
+
+// TestNewWithProviderOptions verifies that programmatic provider overrides
+// (API key, URL) take effect without env vars or config files.
+func TestNewWithProviderOptions(t *testing.T) {
+	if os.Getenv("ANTHROPIC_API_KEY") == "" {
+		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
+	}
+
+	ctx := context.Background()
+
+	// Use the real key but pass it via Options instead of env. Kit should
+	// authenticate successfully — proving the override reached the provider.
+	apiKey := os.Getenv("ANTHROPIC_API_KEY")
+
+	host, err := kit.New(ctx, &kit.Options{
+		Model:          "anthropic/claude-sonnet-4-5-20250929",
+		Quiet:          true,
+		ProviderAPIKey: apiKey,
+	})
+	if err != nil {
+		t.Fatalf("Failed to create Kit with ProviderAPIKey option: %v", err)
+	}
+	defer func() { _ = host.Close() }()
+}
+
 func TestSessionManagement(t *testing.T) {
 	if os.Getenv("ANTHROPIC_API_KEY") == "" {
 		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")

From ecf95b52e19ae0b7ee2a9456a45fed4c3638b855 Mon Sep 17 00:00:00 2001
From: Ed Zynda <ezynda3@gmail.com>
Date: Fri, 17 Apr 2026 11:50:45 +0300
Subject: [PATCH 2/7] fix(sdk): preserve IsSet semantics for generation param
 overrides
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously setSDKDefaults() registered viper.SetDefault for max-tokens,
temperature, top-p, top-k, frequency/presence-penalty, and thinking-level.
viper.SetDefault makes IsSet() return true, which silently suppressed
per-model defaults (ApplyModelSettings) and automatic right-sizing
(rightSizeMaxTokens) for every SDK-created Kit — and for CLI runs too,
since cmd/root.go routes through kit.New. Effective max-tokens for
claude-sonnet-4-5 was pinned at 4096 instead of 32768.

- Drop SetDefault for all IsSet-sensitive keys; keep only model,
  system-prompt, stream, num-gpu-layers, main-gpu.
- Apply a 4096 max-tokens floor directly on the *models.ProviderConfig
  struct in kit.New() when nothing else resolved a value. Keeps
  viper.IsSet("max-tokens") == false so rightSizeMaxTokens and
  per-model maxTokens overrides still fire.
- Update Options.MaxTokens / ThinkingLevel godoc to describe the real
  precedence chain.
- Strengthen tests: add Temperature subtest; add
  TestNewPreservesIsSetSemantics regression covering all seven keys;
  split TestNewWithProviderOptions into three subtests including
  Options-beats-viper-state and ProviderURL propagation; add
  resetViper helper so subtests don't bleed state.
- Document the new SDK fields (MaxTokens, ThinkingLevel, Temperature,
  TopP, TopK, FrequencyPenalty, PresencePenalty, ProviderAPIKey,
  ProviderURL, TLSSkipVerify) in README, skills/kit-sdk, and the www
  configuration / sdk/options / sdk/overview pages, including a
  dedicated precedence table.
---
 README.md                  |  29 +++++-
 pkg/kit/config.go          |  35 +++++--
 pkg/kit/kit.go             |  30 ++++--
 pkg/kit/kit_test.go        | 197 ++++++++++++++++++++++++++++++++++---
 skills/kit-sdk/SKILL.md    |  39 ++++++++
 www/pages/configuration.md |  16 ++-
 www/pages/sdk/options.md   |  98 ++++++++++++++++--
 www/pages/sdk/overview.md  |  21 ++++
 8 files changed, 428 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 31654d39..35f6af68 100644
--- a/README.md
+++ b/README.md
@@ -126,8 +126,13 @@ model: anthropic/claude-sonnet-latest
 max-tokens: 4096
 temperature: 0.7
 stream: true
+thinking-level: off       # off, minimal, low, medium, high
 ```
 
+All of the above keys can also be set programmatically via the SDK
+(`kit.Options.MaxTokens`, `Options.Temperature`, `Options.ThinkingLevel`, etc.)
+without touching config files — see [SDK options](#with-options).
+
 ### Environment Variables
 
 ```bash
@@ -187,7 +192,7 @@ mcpServers:
 --no-prompt-templates    Disable prompt template loading
 
 # Generation parameters
---max-tokens             Maximum tokens in response (default: 4096)
+--max-tokens             Maximum tokens in response (default: 8192, auto-raised up to 32768 for models with larger known output limits)
 --temperature            Randomness 0.0-1.0 (default: 0.7)
 --top-p                  Nucleus sampling 0.0-1.0 (default: 0.95)
 --top-k                  Limit top K tokens (default: 40)
@@ -541,6 +546,20 @@ host, err := kit.New(ctx, &kit.Options{
     Streaming:    true,
     Quiet:        true,
 
+    // Generation parameters (override env/config/per-model defaults)
+    MaxTokens:        16384,             // 0 = auto-resolve (env → config → per-model → 4096 floor)
+    ThinkingLevel:    "medium",          // "off", "low", "medium", "high"
+    Temperature:      ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default
+    TopP:             nil,                // nil = leave provider/per-model default
+    TopK:             nil,
+    FrequencyPenalty: nil,
+    PresencePenalty:  nil,
+
+    // Provider configuration (override env/config without reaching into viper)
+    ProviderAPIKey: "sk-...",                      // "" = use config / provider env var
+    ProviderURL:    "https://proxy.internal/v1",   // "" = provider default
+    TLSSkipVerify:  false,                         // only takes effect when true
+
     // Session options
     SessionPath:  "./session.jsonl",  // Open specific session
     Continue:     true,                // Resume most recent session
@@ -561,6 +580,14 @@ host, err := kit.New(ctx, &kit.Options{
 })
 ```
 
+**Generation & provider fields** (added in v0.55+) let SDK consumers configure
+Kit entirely in-code without `viper.Set()` workarounds or shipping a `.kit.yml`.
+Precedence is `Options` > `KIT_*` env vars > `.kit.yml` > per-model defaults
+(`modelSettings` / `customModels`) > provider-level defaults. Sampling params
+are pointer types so explicit `0.0` is distinguishable from "leave alone"; a
+non-zero `MaxTokens` suppresses automatic right-sizing the same way `--max-tokens`
+does on the CLI.
+
 ### Custom Tools
 
 Create custom tools with automatic schema generation — no external dependencies needed:
diff --git a/pkg/kit/config.go b/pkg/kit/config.go
index 80d85b8f..ae4a7dc6 100644
--- a/pkg/kit/config.go
+++ b/pkg/kit/config.go
@@ -38,20 +38,35 @@ Guidelines:
 - Be concise in your responses
 - Show file paths clearly when working with files`
 
-// setSDKDefaults registers the same viper defaults that the CLI sets via
-// cobra flag bindings. This ensures the SDK behaves identically to the CLI
-// even when cobra is not used.
+// sdkDefaultMaxTokens is the last-resort ceiling applied when the SDK caller
+// has not configured max-tokens via Options, env, config, or a per-model
+// default. It is intentionally applied on the *models.ProviderConfig struct
+// (not via viper) so that viper.IsSet("max-tokens") remains false and the
+// right-sizing + per-model-default paths continue to work.
+const sdkDefaultMaxTokens = 4096
+
+// setSDKDefaults registers viper defaults that match the CLI's cobra flag
+// defaults for keys where SetDefault does not interfere with downstream
+// viper.IsSet() checks.
+//
+// Keys that participate in "explicit vs unset" precedence downstream —
+// max-tokens, temperature, top-p, top-k, frequency-penalty, presence-penalty,
+// thinking-level — are deliberately NOT registered here. viper.SetDefault
+// causes viper.IsSet() to return true, which would suppress per-model
+// defaults (ApplyModelSettings) and automatic right-sizing (rightSizeMaxTokens)
+// for every SDK-created Kit. Those defaults are instead applied:
+//
+//   - max-tokens: as a last-resort struct-level floor (sdkDefaultMaxTokens)
+//     in kit.New() after BuildProviderConfig returns, when the resolved
+//     value is still zero.
+//   - thinking-level: handled implicitly by models.ParseThinkingLevel("")
+//     which returns models.ThinkingOff.
+//   - sampling params (temperature, top-p, top-k, frequency/presence-penalty):
+//     left as nil pointers so provider libraries apply their own defaults.
 func setSDKDefaults() {
 	viper.SetDefault("model", "anthropic/claude-sonnet-4-5-20250929")
 	viper.SetDefault("system-prompt", defaultSystemPrompt)
-	viper.SetDefault("max-tokens", 4096)
-	viper.SetDefault("temperature", 0.7)
-	viper.SetDefault("top-p", 0.95)
-	viper.SetDefault("top-k", 40)
-	viper.SetDefault("frequency-penalty", 0.0)
-	viper.SetDefault("presence-penalty", 0.0)
 	viper.SetDefault("stream", true)
-	viper.SetDefault("thinking-level", "off")
 	viper.SetDefault("num-gpu-layers", -1)
 	viper.SetDefault("main-gpu", 0)
 }
diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go
index f17335ea..fcfcac61 100644
--- a/pkg/kit/kit.go
+++ b/pkg/kit/kit.go
@@ -825,22 +825,27 @@ type Options struct {
 	// .kit.yml / KIT_* environment variables. Leaving a field at its
 	// zero/nil value means "use the configured default", which in turn
 	// falls back to per-model defaults (modelSettings / customModels) and
-	// finally to the SDK defaults registered in setSDKDefaults().
+	// finally to a last-resort SDK floor of 4096 for MaxTokens (sampling
+	// params fall through to provider-level defaults).
 	//
 	// Pointer types are used for sampling parameters so the SDK can
 	// distinguish "explicitly set to 0" from "leave alone".
 
 	// MaxTokens overrides the maximum output tokens per LLM response.
-	// 0 = use the configured default (SDK default is 4096). Bump this
-	// when generating long outputs (HTML artifacts, large refactors,
-	// etc.) to avoid silent truncation mid-tool-call. The cap also
-	// applies after model switches via [Kit.SetModel].
+	// 0 = let the precedence chain resolve a value (env → config →
+	// per-model → 4096 SDK floor). Setting a non-zero value here
+	// suppresses automatic right-sizing, matching the CLI's
+	// --max-tokens flag semantics. Bump this when generating long
+	// outputs (HTML artifacts, large refactors, etc.) to avoid silent
+	// truncation mid-tool-call. The cap also applies after model
+	// switches via [Kit.SetModel].
 	MaxTokens int
 
 	// ThinkingLevel sets the reasoning effort for models that support
 	// extended thinking. Valid values: "off", "low", "medium", "high".
-	// "" = use the configured default (SDK default is "off"). Use
-	// [Kit.SetThinkingLevel] to change at runtime.
+	// "" = let the precedence chain resolve a level (env → config →
+	// per-model → "off"). Use [Kit.SetThinkingLevel] to change at
+	// runtime.
 	ThinkingLevel string
 
 	// Temperature controls sampling randomness (typically 0.0–2.0).
@@ -1237,6 +1242,17 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
 		if pcErr != nil {
 			return fmt.Errorf("failed to build provider config: %w", pcErr)
 		}
+
+		// SDK last-resort max-tokens floor. When nothing — Options, env,
+		// config, nor a per-model default — supplied a value, we land on
+		// zero here (viper.GetInt returns 0 for unset keys). Apply the
+		// SDK default directly on the struct rather than via viper so
+		// viper.IsSet("max-tokens") stays false: downstream right-sizing
+		// can still raise this toward the model's known output ceiling,
+		// and per-model modelSettings[...].maxTokens can still win.
+		if providerConfig.MaxTokens == 0 && opts.MaxTokens == 0 {
+			providerConfig.MaxTokens = sdkDefaultMaxTokens
+		}
 		modelString = viper.GetString("model")
 		debug = viper.GetBool("debug")
 		noExtensions = opts.NoExtensions || viper.GetBool("no-extensions")
diff --git a/pkg/kit/kit_test.go b/pkg/kit/kit_test.go
index 0810637e..0fbdbb60 100644
--- a/pkg/kit/kit_test.go
+++ b/pkg/kit/kit_test.go
@@ -5,6 +5,8 @@ import (
 	"os"
 	"testing"
 
+	"github.com/spf13/viper"
+
 	kit "github.com/mark3labs/kit/pkg/kit"
 )
 
@@ -67,6 +69,8 @@ func TestNewWithGenerationOptions(t *testing.T) {
 	// MaxTokens override — keep ThinkingLevel off so Anthropic's thinking
 	// budget doesn't auto-bump MaxTokens above what we configured.
 	t.Run("MaxTokens", func(t *testing.T) {
+		defer resetViper()
+
 		const want = 12345
 		host, err := kit.New(ctx, &kit.Options{
 			Model:     "anthropic/claude-sonnet-4-5-20250929",
@@ -81,11 +85,16 @@ func TestNewWithGenerationOptions(t *testing.T) {
 		if got := host.MaxTokens(); got != want {
 			t.Errorf("Options.MaxTokens=%d did not propagate; Kit.MaxTokens()=%d", want, got)
 		}
+		if !viper.IsSet("max-tokens") {
+			t.Error("viper.IsSet(\"max-tokens\") should be true after MaxTokens override")
+		}
 	})
 
 	// ThinkingLevel override — verified via the public getter, which
 	// reads back the configured (not provider-derived) level.
 	t.Run("ThinkingLevel", func(t *testing.T) {
+		defer resetViper()
+
 		const want = "high"
 		host, err := kit.New(ctx, &kit.Options{
 			Model:         "anthropic/claude-sonnet-4-5-20250929",
@@ -101,30 +110,172 @@ func TestNewWithGenerationOptions(t *testing.T) {
 			t.Errorf("Options.ThinkingLevel=%q did not propagate; Kit.GetThinkingLevel()=%q", want, got)
 		}
 	})
+
+	// Temperature override — pointer semantics let callers distinguish
+	// "explicitly 0.0" from "unset", which we assert by pushing a distinct
+	// value and reading it back off viper's merged state.
+	t.Run("Temperature", func(t *testing.T) {
+		defer resetViper()
+
+		want := float32(0.12345)
+		host, err := kit.New(ctx, &kit.Options{
+			Model:       "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:       true,
+			Temperature: &want,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if !viper.IsSet("temperature") {
+			t.Fatal("viper.IsSet(\"temperature\") should be true after Temperature override")
+		}
+		if got := float32(viper.GetFloat64("temperature")); got != want {
+			t.Errorf("Options.Temperature=%v did not propagate; viper=%v", want, got)
+		}
+	})
 }
 
-// TestNewWithProviderOptions verifies that programmatic provider overrides
-// (API key, URL) take effect without env vars or config files.
-func TestNewWithProviderOptions(t *testing.T) {
+// TestNewPreservesIsSetSemantics verifies that creating a Kit WITHOUT
+// populating the generation-param Options fields does NOT mark those
+// keys as explicitly set in viper. This is the precedence contract
+// that per-model defaults (ApplyModelSettings) and right-sizing
+// (rightSizeMaxTokens) rely on.
+//
+// Previously setSDKDefaults() used viper.SetDefault() for every param,
+// which caused viper.IsSet() to return true for all of them — silently
+// suppressing per-model defaults and pinning max-tokens at 4096 even
+// on models with much larger output limits.
+func TestNewPreservesIsSetSemantics(t *testing.T) {
 	if os.Getenv("ANTHROPIC_API_KEY") == "" {
 		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
 	}
 
-	ctx := context.Background()
-
-	// Use the real key but pass it via Options instead of env. Kit should
-	// authenticate successfully — proving the override reached the provider.
-	apiKey := os.Getenv("ANTHROPIC_API_KEY")
+	defer resetViper()
 
+	ctx := context.Background()
 	host, err := kit.New(ctx, &kit.Options{
-		Model:          "anthropic/claude-sonnet-4-5-20250929",
-		Quiet:          true,
-		ProviderAPIKey: apiKey,
+		Model:      "anthropic/claude-sonnet-4-5-20250929",
+		Quiet:      true,
+		NoSession:  true,
+		SkipConfig: true, // isolate from any ~/.kit.yml values
 	})
 	if err != nil {
-		t.Fatalf("Failed to create Kit with ProviderAPIKey option: %v", err)
+		t.Fatalf("Failed to create Kit: %v", err)
 	}
 	defer func() { _ = host.Close() }()
+
+	// These keys must remain "unset" from viper's perspective so the
+	// downstream isExplicitlySet() checks allow per-model defaults to
+	// take effect.
+	checkKeys := []string{
+		"max-tokens",
+		"temperature",
+		"top-p",
+		"top-k",
+		"frequency-penalty",
+		"presence-penalty",
+		"thinking-level",
+	}
+
+	// Skip any keys that a KIT_* env var legitimately sets in this
+	// environment — we only care about keys the SDK itself would have
+	// set without the fix.
+	for _, k := range checkKeys {
+		envVar := "KIT_" + upper(k)
+		if os.Getenv(envVar) != "" {
+			continue
+		}
+		if viper.IsSet(k) {
+			t.Errorf("viper.IsSet(%q) == true when no Options field, env var, or config set it "+
+				"(SDK defaults must not corrupt IsSet semantics)", k)
+		}
+	}
+}
+
+// TestNewWithProviderOptions verifies that programmatic provider overrides
+// (API key, URL) take effect without env vars or config files, and that
+// Options.ProviderAPIKey *wins* over any pre-existing viper state.
+func TestNewWithProviderOptions(t *testing.T) {
+	if os.Getenv("ANTHROPIC_API_KEY") == "" {
+		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
+	}
+
+	ctx := context.Background()
+
+	t.Run("succeeds with API key from Options", func(t *testing.T) {
+		defer resetViper()
+
+		apiKey := os.Getenv("ANTHROPIC_API_KEY")
+		host, err := kit.New(ctx, &kit.Options{
+			Model:          "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:          true,
+			NoSession:      true,
+			ProviderAPIKey: apiKey,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit with ProviderAPIKey option: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if got := viper.GetString("provider-api-key"); got != apiKey {
+			t.Errorf("Options.ProviderAPIKey did not propagate to viper; got %q (len=%d)", got, len(got))
+		}
+	})
+
+	// Override precedence: even when viper already holds a different
+	// provider-api-key value (as it would if a config file or earlier
+	// Set() call populated one), Options.ProviderAPIKey must win.
+	t.Run("Options override beats pre-existing viper state", func(t *testing.T) {
+		defer resetViper()
+
+		viper.Set("provider-api-key", "sk-config-file-placeholder")
+
+		want := "sk-from-options-override"
+		// Use an OpenAI-flavored model so the validation path accepts
+		// the placeholder without attempting a real Anthropic handshake.
+		host, err := kit.New(ctx, &kit.Options{
+			Model:            "openai/gpt-4o-mini",
+			Quiet:            true,
+			NoSession:        true,
+			NoExtensions:     true,
+			DisableCoreTools: true,
+			ProviderAPIKey:   want,
+		})
+		// Creation may still fail if the model registry is strict, but
+		// we only care that the override reached viper before any
+		// provider handshake happened.
+		if host != nil {
+			defer func() { _ = host.Close() }()
+		}
+		_ = err
+
+		if got := viper.GetString("provider-api-key"); got != want {
+			t.Errorf("Options.ProviderAPIKey did not override pre-existing viper value; got %q, want %q", got, want)
+		}
+	})
+
+	// ProviderURL override must also reach viper.
+	t.Run("ProviderURL propagates", func(t *testing.T) {
+		defer resetViper()
+
+		const want = "https://custom.example.com/v1"
+		host, err := kit.New(ctx, &kit.Options{
+			Model:       "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:       true,
+			NoSession:   true,
+			ProviderURL: want,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit with ProviderURL option: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if got := viper.GetString("provider-url"); got != want {
+			t.Errorf("Options.ProviderURL did not propagate; got %q, want %q", got, want)
+		}
+	})
 }
 
 func TestSessionManagement(t *testing.T) {
@@ -154,3 +305,25 @@ func TestSessionManagement(t *testing.T) {
 		t.Error("Expected non-empty session ID")
 	}
 }
+
+// resetViper wipes viper's global state so a test case doesn't leak
+// viper.Set() calls into the next one. Used via defer in subtests.
+func resetViper() { viper.Reset() }
+
+// upper returns s with ASCII letters upper-cased and '-' converted to '_'.
+// Used to translate a config key into its KIT_* environment variable name.
+func upper(s string) string {
+	out := make([]byte, len(s))
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		switch {
+		case c == '-':
+			out[i] = '_'
+		case c >= 'a' && c <= 'z':
+			out[i] = c - 'a' + 'A'
+		default:
+			out[i] = c
+		}
+	}
+	return string(out)
+}
diff --git a/skills/kit-sdk/SKILL.md b/skills/kit-sdk/SKILL.md
index b9527818..a708054e 100644
--- a/skills/kit-sdk/SKILL.md
+++ b/skills/kit-sdk/SKILL.md
@@ -80,6 +80,23 @@ host, err := kit.New(ctx, &kit.Options{
     Quiet:     true, // suppress debug output
     Debug:     true, // enable debug logging
 
+    // Generation parameters — override env/config/per-model defaults.
+    // Leaving a field at its zero/nil value lets the precedence chain
+    // resolve a value (KIT_* env → .kit.yml → modelSettings/customModels →
+    // 4096 floor for MaxTokens, provider defaults for samplers).
+    MaxTokens:        16384,             // 0 = auto-resolve; non-zero suppresses right-sizing
+    ThinkingLevel:    "medium",          // "off", "low", "medium", "high" ("" = default)
+    Temperature:      ptrFloat32(0.2),   // pointer so explicit 0.0 != unset
+    TopP:             nil,                // nil = leave provider/per-model default
+    TopK:             nil,                // nil = leave provider/per-model default
+    FrequencyPenalty: nil,
+    PresencePenalty:  nil,
+
+    // Provider configuration — override env/config without viper.Set workarounds.
+    ProviderAPIKey: "sk-...",                    // "" = use config / provider env var
+    ProviderURL:    "https://proxy.internal/v1", // "" = provider default endpoint
+    TLSSkipVerify:  false,                       // true only; can't force-disable via Options
+
     // Session
     SessionDir:  "/path/to/project",  // base dir for session discovery (default: cwd)
     SessionPath: "/path/to/session.jsonl", // open specific session file
@@ -118,12 +135,34 @@ host, err := kit.New(ctx, &kit.Options{
         "docs": mcpSrv,  // *server.MCPServer from mcp-go — no subprocess needed
     },
 })
+
+// Tiny helper to take the address of a literal for pointer fields.
+func ptrFloat32(v float32) *float32 { return &v }
 ```
 
 **Critical distinction**: `Tools` replaces ALL default tools (core + MCP + extension). `ExtraTools` adds tools alongside the defaults. Use `Tools` to restrict the agent's capabilities; use `ExtraTools` to extend them.
 
 **In-process MCP servers** bypass subprocess spawning entirely. Pass `*server.MCPServer` instances from mcp-go via `InProcessMCPServers` or call `AddInProcessMCPServer()` at runtime.
 
+### Generation & provider Options (cheat sheet)
+
+| Field | Type | Empty/nil means | Notes |
+|-------|------|-----------------|-------|
+| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 4096 floor) | Non-zero suppresses `rightSizeMaxTokens` |
+| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) |
+| `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset |
+| `TopP` | `*float32` | Leave provider/per-model default | |
+| `TopK` | `*int32` | Leave provider/per-model default | |
+| `FrequencyPenalty` | `*float32` | Leave provider/per-model default | OpenAI-family |
+| `PresencePenalty` | `*float32` | Leave provider/per-model default | OpenAI-family |
+| `ProviderAPIKey` | `string` | Use config / provider env var | Overrides pre-existing viper state |
+| `ProviderURL` | `string` | Use provider default endpoint | Same base URL flag as `--provider-url` |
+| `TLSSkipVerify` | `bool` | — | Only effective when `true`; cannot force-disable via Options |
+
+These fields eliminate the old `viper.Set("max-tokens", 16384)` dance many
+downstream embedders used to do before calling `kit.New()`. Everything is
+now discoverable via godoc on `kit.Options`.
+
 ---
 
 ## Prompt Methods
diff --git a/www/pages/configuration.md b/www/pages/configuration.md
index 802fa5c4..c8735bac 100644
--- a/www/pages/configuration.md
+++ b/www/pages/configuration.md
@@ -175,10 +175,24 @@ modelSettings:
 | `thinkingLevel` | string | Thinking level override |
 | `systemPrompt` | string | Per-model system prompt (used when no explicit prompt is set) |
 
-Settings from `modelSettings` and `customModels.params` act as model-level defaults — explicit CLI flags and global config values always take precedence.
+Settings from `modelSettings` and `customModels.params` act as model-level defaults — explicit CLI flags, `KIT_*` environment variables, global config values, and SDK `Options.*` fields all take precedence over them.
 
 When switching models via `/model` or `SetModel()`, if the new model has a per-model system prompt and no custom global prompt was set, the per-model prompt automatically replaces the previous one.
 
+### Precedence summary
+
+For the generation and provider parameters documented above, the resolved value at runtime comes from the first source that sets it:
+
+1. CLI flag (e.g. `--max-tokens`, `--temperature`, `--provider-api-key`)
+2. SDK `Options.X` when embedding Kit as a library (`kit.Options.MaxTokens`, `Temperature`, `ProviderAPIKey`, etc.)
+3. `KIT_*` environment variable (`KIT_MAX_TOKENS`, `KIT_TEMPERATURE`, ...)
+4. `.kit.yml` / `.kit.yaml` / `.kit.json` (project-local, then global)
+5. Per-model defaults (`modelSettings[provider/model]` / `customModels[...].params`)
+6. Provider-level defaults (e.g. Anthropic's own temperature default)
+7. SDK last-resort floor — currently a 4096 output-token ceiling when nothing else is configured
+
+See the [SDK options reference](/sdk/options) for the full list of `kit.Options` fields that map to these keys.
+
 ## Theme configuration
 
 ```yaml
diff --git a/www/pages/sdk/options.md b/www/pages/sdk/options.md
index 49c453d7..0c69cb03 100644
--- a/www/pages/sdk/options.md
+++ b/www/pages/sdk/options.md
@@ -22,6 +22,20 @@ host, err := kit.New(ctx, &kit.Options{
     Quiet:        true,
     Debug:        true,
 
+    // Generation parameters (override env/config/per-model defaults)
+    MaxTokens:        16384,              // 0 = auto-resolve; non-zero suppresses right-sizing
+    ThinkingLevel:    "medium",           // "off", "low", "medium", "high"
+    Temperature:      ptrFloat32(0.2),    // pointer so explicit 0.0 != unset
+    TopP:             nil,                 // nil = provider/per-model default
+    TopK:             nil,
+    FrequencyPenalty: nil,
+    PresencePenalty:  nil,
+
+    // Provider configuration
+    ProviderAPIKey: "sk-...",                      // "" = use config / provider env var
+    ProviderURL:    "https://proxy.internal/v1",  // "" = provider default endpoint
+    TLSSkipVerify:  false,                         // only effective when true
+
     // Session
     SessionPath:  "./session.jsonl",
     SessionDir:   "/custom/sessions/",
@@ -65,6 +79,8 @@ host, err := kit.New(ctx, &kit.Options{
 
 ## Options fields
 
+### Core
+
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
 | `Model` | `string` | config default | Model string (provider/model format) |
@@ -74,25 +90,95 @@ host, err := kit.New(ctx, &kit.Options{
 | `Streaming` | `bool` | `true` | Enable streaming output |
 | `Quiet` | `bool` | `false` | Suppress output |
 | `Debug` | `bool` | `false` | Enable debug logging |
+
+### Generation parameters
+
+These fields override the corresponding values from `.kit.yml` / `KIT_*`
+environment variables. Leaving a field at its zero/nil value lets the
+precedence chain resolve a value (`KIT_*` env → config file → per-model
+defaults from `modelSettings`/`customModels` → a 4096 SDK floor for
+`MaxTokens` and provider-level defaults for samplers).
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `MaxTokens` | `int` | auto-resolved | Max output tokens per response. `0` = auto-resolve; non-zero suppresses automatic right-sizing (same semantics as `--max-tokens`). |
+| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"low"`, `"medium"`, `"high"` (some providers also accept `"minimal"`). `""` falls through to config/env/per-model/`"off"`. |
+| `Temperature` | `*float32` | — | Sampling randomness. Pointer type so explicit `0.0` is distinguishable from "unset". |
+| `TopP` | `*float32` | — | Nucleus sampling cutoff. `nil` leaves provider/per-model default. |
+| `TopK` | `*int32` | — | Top-K sampling limit. `nil` leaves provider/per-model default. |
+| `FrequencyPenalty` | `*float32` | — | OpenAI-family frequency penalty. `nil` leaves provider default. |
+| `PresencePenalty` | `*float32` | — | OpenAI-family presence penalty. `nil` leaves provider default. |
+
+Pointer-typed samplers are populated via a tiny helper:
+
+```go
+func ptrFloat32(v float32) *float32 { return &v }
+```
+
+These fields eliminate the need for `viper.Set()` calls before `kit.New()`
+when embedding Kit as a library.
+
+### Provider configuration
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `ProviderAPIKey` | `string` | — | API key used to authenticate with the provider. `""` falls back to config / provider-specific env var (e.g. `ANTHROPIC_API_KEY`). When set, overrides any pre-existing viper state. |
+| `ProviderURL` | `string` | — | Override the provider endpoint (e.g. LiteLLM, vLLM, Azure OpenAI, internal proxy). `""` = provider default. |
+| `TLSSkipVerify` | `bool` | `false` | Disable TLS certificate verification on the provider HTTP client. Only effective when `true`; to force-disable, use config file or env var instead. For self-signed dev certs only. |
+
+### Session
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
 | `SessionPath` | `string` | — | Open a specific session file |
 | `SessionDir` | `string` | — | Base directory for session discovery |
 | `Continue` | `bool` | `false` | Resume most recent session |
 | `NoSession` | `bool` | `false` | Ephemeral mode (no persistence) |
+| `SessionManager` | `SessionManager` | — | Custom session backend (advanced) |
+
+### Tools & extensions
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
 | `Tools` | `[]Tool` | — | Replace the entire default tool set |
 | `ExtraTools` | `[]Tool` | — | Additional tools alongside core/MCP/extension tools |
 | `DisableCoreTools` | `bool` | `false` | Use no core tools (0 tools, for chat-only) |
-| `SkipConfig` | `bool` | `false` | Skip .kit.yml file loading |
-| `AutoCompact` | `bool` | `false` | Auto-compact when near context limit |
-| `CompactionOptions` | `*CompactionOptions` | — | Configuration for auto-compaction |
+| `NoExtensions` | `bool` | `false` | Disable Yaegi extension loading |
+| `NoContextFiles` | `bool` | `false` | Disable automatic AGENTS.md loading |
+
+### Skills & configuration
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `SkipConfig` | `bool` | `false` | Skip `.kit.yml` file loading (viper defaults + env vars still apply) |
 | `Skills` | `[]string` | — | Explicit skill files/dirs to load |
 | `SkillsDir` | `string` | — | Override default skills directory |
 | `NoSkills` | `bool` | `false` | Disable skill loading entirely |
-| `NoExtensions` | `bool` | `false` | Disable Yaegi extension loading |
-| `NoContextFiles` | `bool` | `false` | Disable automatic AGENTS.md loading |
-| `SessionManager` | `SessionManager` | — | Custom session backend (advanced) |
+
+### Compaction & MCP
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `AutoCompact` | `bool` | `false` | Auto-compact when near context limit |
+| `CompactionOptions` | `*CompactionOptions` | — | Configuration for auto-compaction |
 | `MCPTokenStoreFactory` | `func` | — | Custom OAuth token storage for MCP servers |
 | `InProcessMCPServers` | `map[string]*MCPServer` | — | In-process mcp-go servers (no subprocess) |
 
+## Precedence
+
+For any given generation or provider field, the effective value is resolved
+in this order (highest priority first):
+
+1. `Options.X` (SDK caller)
+2. `KIT_X` environment variable
+3. `.kit.yml` (project-local then `~/.kit.yml`)
+4. Per-model defaults (`modelSettings[provider/model]` or `customModels[...].params`)
+5. Provider-level defaults (e.g. Anthropic's own temperature default)
+6. SDK last-resort floor (currently: `MaxTokens = 4096`)
+
+Sampling params that remain `nil` after the SDK resolution step are left out
+of the provider call entirely, so the LLM library applies its own default.
+
 ## Tool configuration
 
 **`Tools`** replaces ALL default tools (core + MCP + extension). **`ExtraTools`** adds tools alongside the defaults. Use `Tools` to restrict capabilities; use `ExtraTools` to extend them.
diff --git a/www/pages/sdk/overview.md b/www/pages/sdk/overview.md
index 4a45eff0..b16c63d0 100644
--- a/www/pages/sdk/overview.md
+++ b/www/pages/sdk/overview.md
@@ -106,6 +106,27 @@ For advanced use, return a `kit.ToolOutput` struct directly with `Data`, `MediaT
 
 Use `kit.NewParallelTool` for tools that are safe to run concurrently. Use `kit.ToolCallIDFromContext(ctx)` to retrieve the LLM-assigned call ID for logging or tracing.
 
+## Generation & provider overrides
+
+SDK consumers can configure generation parameters and provider endpoints
+entirely in-code via `Options`, without touching `.kit.yml` or `viper.Set()`:
+
+```go
+host, _ := kit.New(ctx, &kit.Options{
+    Model:          "anthropic/claude-sonnet-4-5-20250929",
+    MaxTokens:      16384,             // 0 = auto-resolve (env → config → per-model → floor)
+    ThinkingLevel:  "high",            // "off" | "low" | "medium" | "high"
+    Temperature:    ptrFloat32(0.2),   // nil = provider/per-model default
+    ProviderAPIKey: os.Getenv("MY_SECRET"), // overrides pre-existing viper state
+    ProviderURL:    "https://proxy.internal/v1",
+})
+
+func ptrFloat32(v float32) *float32 { return &v }
+```
+
+See [Options](/sdk/options#generation-parameters) for the full field reference,
+including `TopP`, `TopK`, `FrequencyPenalty`, `PresencePenalty`, and `TLSSkipVerify`.
+
 ## Event system
 
 Subscribe to events for monitoring:

From e1c94cb362a3cc3395598512bcf7fa078f3657bf Mon Sep 17 00:00:00 2001
From: Ed Zynda <ezynda3@gmail.com>
Date: Fri, 17 Apr 2026 11:59:49 +0300
Subject: [PATCH 3/7] =?UTF-8?q?fix(sdk):=20align=20SDK=20max-tokens=20floo?=
 =?UTF-8?q?r=20with=20CLI=20default=20(4096=20=E2=86=92=208192)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SDK last-resort MaxTokens floor is applied in kit.New() when
Options.MaxTokens, KIT_MAX_TOKENS, .kit.yml, and per-model defaults
are all unset. It was 4096 (inherited from the old setSDKDefaults
viper default) while the CLI --max-tokens cobra default is 8192.

Bump the floor to 8192 so SDK and CLI callers start from the same
base value before rightSizeMaxTokens runs, then update README,
skills/kit-sdk/SKILL.md, and www/pages/{configuration,sdk/options}.md
to match.
---
 README.md                  |  2 +-
 pkg/kit/config.go          |  6 ++++--
 pkg/kit/kit.go             | 17 +++++++++--------
 skills/kit-sdk/SKILL.md    |  4 ++--
 www/pages/configuration.md |  2 +-
 www/pages/sdk/options.md   |  7 ++++---
 6 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 35f6af68..4653d1ca 100644
--- a/README.md
+++ b/README.md
@@ -547,7 +547,7 @@ host, err := kit.New(ctx, &kit.Options{
     Quiet:        true,
 
     // Generation parameters (override env/config/per-model defaults)
-    MaxTokens:        16384,             // 0 = auto-resolve (env → config → per-model → 4096 floor)
+    MaxTokens:        16384,             // 0 = auto-resolve (env → config → per-model → 8192 floor)
     ThinkingLevel:    "medium",          // "off", "low", "medium", "high"
     Temperature:      ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default
     TopP:             nil,                // nil = leave provider/per-model default
diff --git a/pkg/kit/config.go b/pkg/kit/config.go
index ae4a7dc6..7ff28bf8 100644
--- a/pkg/kit/config.go
+++ b/pkg/kit/config.go
@@ -40,10 +40,12 @@ Guidelines:
 
 // sdkDefaultMaxTokens is the last-resort ceiling applied when the SDK caller
 // has not configured max-tokens via Options, env, config, or a per-model
-// default. It is intentionally applied on the *models.ProviderConfig struct
+// default. It matches the CLI's --max-tokens cobra default so SDK and CLI
+// callers see the same base value before per-model right-sizing runs.
+// It is intentionally applied on the *models.ProviderConfig struct
 // (not via viper) so that viper.IsSet("max-tokens") remains false and the
 // right-sizing + per-model-default paths continue to work.
-const sdkDefaultMaxTokens = 4096
+const sdkDefaultMaxTokens = 8192
 
 // setSDKDefaults registers viper defaults that match the CLI's cobra flag
 // defaults for keys where SetDefault does not interfere with downstream
diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go
index fcfcac61..632bde53 100644
--- a/pkg/kit/kit.go
+++ b/pkg/kit/kit.go
@@ -825,20 +825,21 @@ type Options struct {
 	// .kit.yml / KIT_* environment variables. Leaving a field at its
 	// zero/nil value means "use the configured default", which in turn
 	// falls back to per-model defaults (modelSettings / customModels) and
-	// finally to a last-resort SDK floor of 4096 for MaxTokens (sampling
-	// params fall through to provider-level defaults).
+	// finally to a last-resort SDK floor of 8192 for MaxTokens (matching
+	// the CLI --max-tokens default; sampling params fall through to
+	// provider-level defaults).
 	//
 	// Pointer types are used for sampling parameters so the SDK can
 	// distinguish "explicitly set to 0" from "leave alone".
 
 	// MaxTokens overrides the maximum output tokens per LLM response.
 	// 0 = let the precedence chain resolve a value (env → config →
-	// per-model → 4096 SDK floor). Setting a non-zero value here
-	// suppresses automatic right-sizing, matching the CLI's
-	// --max-tokens flag semantics. Bump this when generating long
-	// outputs (HTML artifacts, large refactors, etc.) to avoid silent
-	// truncation mid-tool-call. The cap also applies after model
-	// switches via [Kit.SetModel].
+	// per-model → 8192 SDK floor, matching the CLI default). Setting a
+	// non-zero value here suppresses automatic right-sizing, matching
+	// the CLI's --max-tokens flag semantics. Bump this when generating
+	// long outputs (HTML artifacts, large refactors, etc.) to avoid
+	// silent truncation mid-tool-call. The cap also applies after
+	// model switches via [Kit.SetModel].
 	MaxTokens int
 
 	// ThinkingLevel sets the reasoning effort for models that support
diff --git a/skills/kit-sdk/SKILL.md b/skills/kit-sdk/SKILL.md
index a708054e..0dc1a02e 100644
--- a/skills/kit-sdk/SKILL.md
+++ b/skills/kit-sdk/SKILL.md
@@ -83,7 +83,7 @@ host, err := kit.New(ctx, &kit.Options{
     // Generation parameters — override env/config/per-model defaults.
     // Leaving a field at its zero/nil value lets the precedence chain
     // resolve a value (KIT_* env → .kit.yml → modelSettings/customModels →
-    // 4096 floor for MaxTokens, provider defaults for samplers).
+    // 8192 floor for MaxTokens, provider defaults for samplers).
     MaxTokens:        16384,             // 0 = auto-resolve; non-zero suppresses right-sizing
     ThinkingLevel:    "medium",          // "off", "low", "medium", "high" ("" = default)
     Temperature:      ptrFloat32(0.2),   // pointer so explicit 0.0 != unset
@@ -148,7 +148,7 @@ func ptrFloat32(v float32) *float32 { return &v }
 
 | Field | Type | Empty/nil means | Notes |
 |-------|------|-----------------|-------|
-| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 4096 floor) | Non-zero suppresses `rightSizeMaxTokens` |
+| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 8192 floor) | Non-zero suppresses `rightSizeMaxTokens` |
 | `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) |
 | `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset |
 | `TopP` | `*float32` | Leave provider/per-model default | |
diff --git a/www/pages/configuration.md b/www/pages/configuration.md
index c8735bac..2176f07d 100644
--- a/www/pages/configuration.md
+++ b/www/pages/configuration.md
@@ -189,7 +189,7 @@ For the generation and provider parameters documented above, the resolved value
 4. `.kit.yml` / `.kit.yaml` / `.kit.json` (project-local, then global)
 5. Per-model defaults (`modelSettings[provider/model]` / `customModels[...].params`)
 6. Provider-level defaults (e.g. Anthropic's own temperature default)
-7. SDK last-resort floor — currently a 4096 output-token ceiling when nothing else is configured
+7. SDK last-resort floor — currently an 8192 output-token ceiling matching the CLI `--max-tokens` default, auto-raised per-model up to 32768 when the model's catalog ceiling is higher
 
 See the [SDK options reference](/sdk/options) for the full list of `kit.Options` fields that map to these keys.
 
diff --git a/www/pages/sdk/options.md b/www/pages/sdk/options.md
index 0c69cb03..29a55911 100644
--- a/www/pages/sdk/options.md
+++ b/www/pages/sdk/options.md
@@ -96,8 +96,9 @@ host, err := kit.New(ctx, &kit.Options{
 These fields override the corresponding values from `.kit.yml` / `KIT_*`
 environment variables. Leaving a field at its zero/nil value lets the
 precedence chain resolve a value (`KIT_*` env → config file → per-model
-defaults from `modelSettings`/`customModels` → a 4096 SDK floor for
-`MaxTokens` and provider-level defaults for samplers).
+defaults from `modelSettings`/`customModels` → an 8192 SDK floor for
+`MaxTokens` (matching the CLI `--max-tokens` default) and provider-level
+defaults for samplers).
 
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
@@ -174,7 +175,7 @@ in this order (highest priority first):
 3. `.kit.yml` (project-local then `~/.kit.yml`)
 4. Per-model defaults (`modelSettings[provider/model]` or `customModels[...].params`)
 5. Provider-level defaults (e.g. Anthropic's own temperature default)
-6. SDK last-resort floor (currently: `MaxTokens = 4096`)
+6. SDK last-resort floor (currently: `MaxTokens = 8192`, matching the CLI `--max-tokens` default)
 
 Sampling params that remain `nil` after the SDK resolution step are left out
 of the provider call entirely, so the LLM library applies its own default.

From 53b628c5f8fd52c9ad9a199def24a44ba7a79ee8 Mon Sep 17 00:00:00 2001
From: Ed Zynda <ezynda3@gmail.com>
Date: Fri, 17 Apr 2026 12:07:29 +0300
Subject: [PATCH 4/7] fix(sdk): map hyphenated config keys to KIT_* env vars

- InitConfig now installs a viper env key replacer so keys like
  "max-tokens" bind to KIT_MAX_TOKENS under AutomaticEnv; previously
  hyphenated keys silently missed their documented env overrides.
- Simplify TestNewPreservesIsSetSemantics: with SkipConfig: true no env
  bindings are registered, so the os.Getenv guard and upper() helper
  were dead weight. Remove both and drop the unused helper.
---
 pkg/kit/config.go   |  4 ++++
 pkg/kit/kit_test.go | 31 ++++++-------------------------
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/pkg/kit/config.go b/pkg/kit/config.go
index 7ff28bf8..7636bb86 100644
--- a/pkg/kit/config.go
+++ b/pkg/kit/config.go
@@ -119,6 +119,10 @@ func InitConfig(configFile string, debug bool) error {
 	}
 
 	viper.SetEnvPrefix("KIT")
+	// Map hyphenated config keys (e.g. "max-tokens") to underscored env
+	// var names (e.g. KIT_MAX_TOKENS). Without this, AutomaticEnv looks
+	// for KIT_MAX-TOKENS and silently misses valid env overrides.
+	viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
 	viper.AutomaticEnv()
 	return nil
 }
diff --git a/pkg/kit/kit_test.go b/pkg/kit/kit_test.go
index 0fbdbb60..e21680b9 100644
--- a/pkg/kit/kit_test.go
+++ b/pkg/kit/kit_test.go
@@ -179,16 +179,13 @@ func TestNewPreservesIsSetSemantics(t *testing.T) {
 		"thinking-level",
 	}
 
-	// Skip any keys that a KIT_* env var legitimately sets in this
-	// environment — we only care about keys the SDK itself would have
-	// set without the fix.
+	// With SkipConfig: true, InitConfig() is not invoked, so viper has
+	// no env-var bindings registered. Any IsSet() here would come purely
+	// from SDK-side SetDefault/Set calls — which is exactly what this
+	// test is guarding against.
 	for _, k := range checkKeys {
-		envVar := "KIT_" + upper(k)
-		if os.Getenv(envVar) != "" {
-			continue
-		}
 		if viper.IsSet(k) {
-			t.Errorf("viper.IsSet(%q) == true when no Options field, env var, or config set it "+
+			t.Errorf("viper.IsSet(%q) == true when no Options field set it "+
 				"(SDK defaults must not corrupt IsSet semantics)", k)
 		}
 	}
@@ -310,20 +307,4 @@ func TestSessionManagement(t *testing.T) {
 // viper.Set() calls into the next one. Used via defer in subtests.
 func resetViper() { viper.Reset() }
 
-// upper returns s with ASCII letters upper-cased and '-' converted to '_'.
-// Used to translate a config key into its KIT_* environment variable name.
-func upper(s string) string {
-	out := make([]byte, len(s))
-	for i := 0; i < len(s); i++ {
-		c := s[i]
-		switch {
-		case c == '-':
-			out[i] = '_'
-		case c >= 'a' && c <= 'z':
-			out[i] = c - 'a' + 'A'
-		default:
-			out[i] = c
-		}
-	}
-	return string(out)
-}
+

From 8a851723ba2b075c2f8a9bffd84da6af6aedde64 Mon Sep 17 00:00:00 2001
From: Ed Zynda <ezynda3@gmail.com>
Date: Fri, 17 Apr 2026 12:07:54 +0300
Subject: [PATCH 5/7] style(sdk): gofmt trailing newlines in kit_test.go

---
 pkg/kit/kit_test.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pkg/kit/kit_test.go b/pkg/kit/kit_test.go
index e21680b9..7eead8a2 100644
--- a/pkg/kit/kit_test.go
+++ b/pkg/kit/kit_test.go
@@ -306,5 +306,3 @@ func TestSessionManagement(t *testing.T) {
 // resetViper wipes viper's global state so a test case doesn't leak
 // viper.Set() calls into the next one. Used via defer in subtests.
 func resetViper() { viper.Reset() }
-
-

From 5ec2217b0f9e9972d5e8bd07ac189266f2146a39 Mon Sep 17 00:00:00 2001
From: Ed Zynda <ezynda3@gmail.com>
Date: Fri, 17 Apr 2026 12:09:13 +0300
Subject: [PATCH 6/7] docs(sdk): document global viper state leakage in New and
 Options

The SDK applies Options by calling viper.Set on viper's process-global
store, which means two Kits constructed in the same process are not
isolated from each other: the second New overwrites the first's keys,
and downstream readers (SetModel, GetThinkingLevel, BuildProviderConfig)
observe the most recent value.

- Add a 'Global viper state warning' block to the Options godoc
  explaining the leak, the zero-value-does-not-clear gotcha, and
  pointing at viper.Reset() as the migration workaround.
- Add a matching warning to the New godoc so consumers discover the
  constraint from either entry point.
- Detach the viperInitMu godoc (previously lodged inside New's comment
  block) and clarify that the mutex only guards the construction
  window, not instance isolation.
- Add a TODO noting the proper fix: refactor to a per-call viper.New()
  instance so each Kit owns its own config store.
---
 pkg/kit/kit.go | 48 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go
index 632bde53..d880deb2 100644
--- a/pkg/kit/kit.go
+++ b/pkg/kit/kit.go
@@ -811,6 +811,29 @@ func (m *Kit) ExecuteCompletion(ctx context.Context, req extensions.CompleteRequ
 // Options configures Kit creation with optional overrides for model,
 // prompts, configuration, and behavior settings. All fields are optional
 // and will use CLI defaults if not specified.
+//
+// Global viper state warning:
+// Options are applied by [New] via [viper.Set] calls against viper's
+// process-global store. This store is shared with every downstream reader
+// (e.g. [Kit.SetModel], [Kit.GetThinkingLevel], BuildProviderConfig, and
+// any other code path that calls viper.Get*). Two consequences:
+//
+//  1. Kit instances are NOT isolated from each other within a single
+//     process. Values set by the second New() call overwrite the first,
+//     and any code that later reads viper will see the most recent Set.
+//  2. Fields left at the zero value do NOT clear prior viper state; they
+//     simply skip the viper.Set. Callers that need a clean slate between
+//     constructions should invoke viper.Reset() (the test suite uses a
+//     private resetViper() helper that wraps it) before the next New().
+//
+// Recommended usage: create one Kit per process, or reset viper between
+// constructions. Concurrent calls to New are serialized internally by
+// [viperInitMu], but that mutex does not prevent later viper reads (from
+// a different Kit) from observing mutated keys.
+//
+// TODO: refactor New to use a per-instance *viper.Viper (constructed via
+// viper.New()) so each Kit owns its own isolated config store and Options
+// no longer leak through the global singleton.
 type Options struct {
 	Model        string // Override model (e.g., "anthropic/claude-sonnet-4-5-20250929")
 	SystemPrompt string // Override system prompt
@@ -1049,14 +1072,29 @@ func InitTreeSession(opts *Options) (*session.TreeManager, error) {
 	return session.CreateTreeSession(sessionDir)
 }
 
-// New creates a Kit instance using the same initialization as the CLI.
-// It loads configuration, initializes MCP servers, creates the LLM model, and
-// sets up the agent for interaction. Returns an error if initialization fails.
-// viperInitMu serializes viper writes during kit.New(). Viper's global state
+// viperInitMu serializes viper writes during [New]. Viper's global state
 // is not thread-safe, so concurrent calls (e.g. parallel subagent spawns)
-// must not overlap the Set()/Get() window.
+// must not overlap the Set/Get window. Note that this mutex only protects
+// the construction window — it does not isolate long-lived Kit instances
+// from each other. See the "Global viper state warning" on [Options].
 var viperInitMu sync.Mutex
 
+// New creates a Kit instance using the same initialization as the CLI.
+// It loads configuration, initializes MCP servers, creates the LLM model, and
+// sets up the agent for interaction. Returns an error if initialization fails.
+//
+// Global viper state warning: fields on [Options] are applied by calling
+// [viper.Set] on viper's process-global store. As a result, two Kits
+// constructed in the same process are NOT isolated: the second New
+// overwrites viper keys set by the first, and any downstream reader
+// (e.g. [Kit.SetModel], [Kit.GetThinkingLevel]) will observe the most
+// recent value. Callers that need multiple independent Kits should call
+// viper.Reset() between constructions, or avoid constructing more than
+// one Kit per process. Writes during New are serialized by [viperInitMu].
+//
+// TODO: refactor to use a per-call viper.New() instance so each Kit owns
+// its own isolated config store and Options stop leaking through the
+// global singleton.
 func New(ctx context.Context, opts *Options) (*Kit, error) {
 	if opts == nil {
 		opts = &Options{}

From 4e82fac442c42eeedd0f3e41e1a3fef4d6657738 Mon Sep 17 00:00:00 2001
From: Ed Zynda <ezynda3@gmail.com>
Date: Fri, 17 Apr 2026 12:13:28 +0300
Subject: [PATCH 7/7] fix(fileutil): decouple TestDetectMediaType from system
 MIME db
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TestDetectMediaType/.go fails on CI images (Ubuntu mime-support) where
/etc/mime.types registers '.go → text/x-go', because mime.TypeByExtension
reads those files at init. The test intended to exercise the 'unknown
extension falls through to text/plain' branch but used a real extension,
making the assertion environment-dependent.

Replace '.go' with '.kitsyntheticext', an invented extension that no
system MIME database registers. The fallback path is now exercised
deterministically on any host.
---
 internal/ui/fileutil/processor_test.go | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/internal/ui/fileutil/processor_test.go b/internal/ui/fileutil/processor_test.go
index 5539483f..cefb2c01 100644
--- a/internal/ui/fileutil/processor_test.go
+++ b/internal/ui/fileutil/processor_test.go
@@ -145,7 +145,13 @@ func TestDetectMediaType(t *testing.T) {
 		content  []byte
 		expected string
 	}{
-		{".go", nil, "text/plain"}, // .go falls back to content sniffing → text/plain
+		// An intentionally-synthetic extension that is not registered
+		// in any system MIME database. Exercises the "unknown ext +
+		// no content" branch, which must return the text/plain default.
+		// Do not use real extensions (e.g. .go) here: CI images often
+		// ship /etc/mime.types with entries like ".go → text/x-go",
+		// which would make the assertion environment-dependent.
+		{".kitsyntheticext", nil, "text/plain"},
 		{".png", []byte{0x89, 0x50, 0x4E, 0x47}, "image/png"},
 		{".jpg", []byte{0xFF, 0xD8, 0xFF}, "image/jpeg"},
 		{".pdf", []byte{0x25, 0x50, 0x44, 0x46}, "application/pdf"},