diff --git a/README.md b/README.md
index 31654d39..4653d1ca 100644
--- a/README.md
+++ b/README.md
@@ -126,8 +126,13 @@ model: anthropic/claude-sonnet-latest
 max-tokens: 4096
 temperature: 0.7
 stream: true
+thinking-level: off       # off, minimal, low, medium, high
 ```
 
+All of the above keys can also be set programmatically via the SDK
+(`kit.Options.MaxTokens`, `Options.Temperature`, `Options.ThinkingLevel`, etc.)
+without touching config files — see [SDK options](#with-options).
+
 ### Environment Variables
 
 ```bash
@@ -187,7 +192,7 @@ mcpServers:
 --no-prompt-templates    Disable prompt template loading
 
 # Generation parameters
---max-tokens             Maximum tokens in response (default: 4096)
+--max-tokens             Maximum tokens in response (default: 8192, auto-raised up to 32768 for models with larger known output limits)
 --temperature            Randomness 0.0-1.0 (default: 0.7)
 --top-p                  Nucleus sampling 0.0-1.0 (default: 0.95)
 --top-k                  Limit top K tokens (default: 40)
@@ -541,6 +546,20 @@ host, err := kit.New(ctx, &kit.Options{
     Streaming:    true,
     Quiet:        true,
 
+    // Generation parameters (override env/config/per-model defaults)
+    MaxTokens:        16384,             // 0 = auto-resolve (env → config → per-model → 8192 floor)
+    ThinkingLevel:    "medium",          // "off", "low", "medium", "high"
+    Temperature:      ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default
+    TopP:             nil,                // nil = leave provider/per-model default
+    TopK:             nil,
+    FrequencyPenalty: nil,
+    PresencePenalty:  nil,
+
+    // Provider configuration (override env/config without reaching into viper)
+    ProviderAPIKey: "sk-...",                      // "" = use config / provider env var
+    ProviderURL:    "https://proxy.internal/v1",   // "" = provider default
+    TLSSkipVerify:  false,                         // only takes effect when true
+
     // Session options
     SessionPath:  "./session.jsonl",  // Open specific session
     Continue:     true,                // Resume most recent session
@@ -561,6 +580,14 @@ host, err := kit.New(ctx, &kit.Options{
 })
 ```
 
+**Generation & provider fields** (added in v0.55+) let SDK consumers configure
+Kit entirely in-code without `viper.Set()` workarounds or shipping a `.kit.yml`.
+Precedence is `Options` > `KIT_*` env vars > `.kit.yml` > per-model defaults
+(`modelSettings` / `customModels`) > provider-level defaults. Sampling params
+are pointer types so explicit `0.0` is distinguishable from "leave alone"; a
+non-zero `MaxTokens` suppresses automatic right-sizing the same way `--max-tokens`
+does on the CLI.
+
 ### Custom Tools
 
 Create custom tools with automatic schema generation — no external dependencies needed:
diff --git a/internal/ui/fileutil/processor_test.go b/internal/ui/fileutil/processor_test.go
index 5539483f..cefb2c01 100644
--- a/internal/ui/fileutil/processor_test.go
+++ b/internal/ui/fileutil/processor_test.go
@@ -145,7 +145,13 @@ func TestDetectMediaType(t *testing.T) {
 		content  []byte
 		expected string
 	}{
-		{".go", nil, "text/plain"}, // .go falls back to content sniffing → text/plain
+		// An intentionally-synthetic extension that is not registered
+		// in any system MIME database. Exercises the "unknown ext +
+		// no content" branch, which must return the text/plain default.
+		// Do not use real extensions (e.g. .go) here: CI images often
+		// ship /etc/mime.types with entries like ".go → text/x-go",
+		// which would make the assertion environment-dependent.
+		{".kitsyntheticext", nil, "text/plain"},
 		{".png", []byte{0x89, 0x50, 0x4E, 0x47}, "image/png"},
 		{".jpg", []byte{0xFF, 0xD8, 0xFF}, "image/jpeg"},
 		{".pdf", []byte{0x25, 0x50, 0x44, 0x46}, "application/pdf"},
diff --git a/pkg/kit/config.go b/pkg/kit/config.go
index 80d85b8f..7636bb86 100644
--- a/pkg/kit/config.go
+++ b/pkg/kit/config.go
@@ -38,20 +38,37 @@ Guidelines:
 - Be concise in your responses
 - Show file paths clearly when working with files`
 
-// setSDKDefaults registers the same viper defaults that the CLI sets via
-// cobra flag bindings. This ensures the SDK behaves identically to the CLI
-// even when cobra is not used.
+// sdkDefaultMaxTokens is the last-resort ceiling applied when the SDK caller
+// has not configured max-tokens via Options, env, config, or a per-model
+// default. It matches the CLI's --max-tokens cobra default so SDK and CLI
+// callers see the same base value before per-model right-sizing runs.
+// It is intentionally applied on the *models.ProviderConfig struct
+// (not via viper) so that viper.IsSet("max-tokens") remains false and the
+// right-sizing + per-model-default paths continue to work.
+const sdkDefaultMaxTokens = 8192
+
+// setSDKDefaults registers viper defaults that match the CLI's cobra flag
+// defaults for keys where SetDefault does not interfere with downstream
+// viper.IsSet() checks.
+//
+// Keys that participate in "explicit vs unset" precedence downstream —
+// max-tokens, temperature, top-p, top-k, frequency-penalty, presence-penalty,
+// thinking-level — are deliberately NOT registered here. viper.SetDefault
+// causes viper.IsSet() to return true, which would suppress per-model
+// defaults (ApplyModelSettings) and automatic right-sizing (rightSizeMaxTokens)
+// for every SDK-created Kit. Those defaults are instead applied:
+//
+//   - max-tokens: as a last-resort struct-level floor (sdkDefaultMaxTokens)
+//     in kit.New() after BuildProviderConfig returns, when the resolved
+//     value is still zero.
+//   - thinking-level: handled implicitly by models.ParseThinkingLevel("")
+//     which returns models.ThinkingOff.
+//   - sampling params (temperature, top-p, top-k, frequency/presence-penalty):
+//     left as nil pointers so provider libraries apply their own defaults.
 func setSDKDefaults() {
 	viper.SetDefault("model", "anthropic/claude-sonnet-4-5-20250929")
 	viper.SetDefault("system-prompt", defaultSystemPrompt)
-	viper.SetDefault("max-tokens", 4096)
-	viper.SetDefault("temperature", 0.7)
-	viper.SetDefault("top-p", 0.95)
-	viper.SetDefault("top-k", 40)
-	viper.SetDefault("frequency-penalty", 0.0)
-	viper.SetDefault("presence-penalty", 0.0)
 	viper.SetDefault("stream", true)
-	viper.SetDefault("thinking-level", "off")
 	viper.SetDefault("num-gpu-layers", -1)
 	viper.SetDefault("main-gpu", 0)
 }
@@ -102,6 +119,10 @@ func InitConfig(configFile string, debug bool) error {
 	}
 
 	viper.SetEnvPrefix("KIT")
+	// Map hyphenated config keys (e.g. "max-tokens") to underscored env
+	// var names (e.g. KIT_MAX_TOKENS). Without this, AutomaticEnv looks
+	// for KIT_MAX-TOKENS and silently misses valid env overrides.
+	viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
 	viper.AutomaticEnv()
 	return nil
 }
diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go
index 4e587525..d880deb2 100644
--- a/pkg/kit/kit.go
+++ b/pkg/kit/kit.go
@@ -811,6 +811,29 @@ func (m *Kit) ExecuteCompletion(ctx context.Context, req extensions.CompleteRequ
 // Options configures Kit creation with optional overrides for model,
 // prompts, configuration, and behavior settings. All fields are optional
 // and will use CLI defaults if not specified.
+//
+// Global viper state warning:
+// Options are applied by [New] via [viper.Set] calls against viper's
+// process-global store. This store is shared with every downstream reader
+// (e.g. [Kit.SetModel], [Kit.GetThinkingLevel], BuildProviderConfig, and
+// any other code path that calls viper.Get*). Two consequences:
+//
+//  1. Kit instances are NOT isolated from each other within a single
+//     process. Values set by the second New() call overwrite the first,
+//     and any code that later reads viper will see the most recent Set.
+//  2. Fields left at the zero value do NOT clear prior viper state; they
+//     simply skip the viper.Set. Callers that need a clean slate between
+//     constructions should invoke viper.Reset() (the test suite uses a
+//     private resetViper() helper that wraps it) before the next New().
+//
+// Recommended usage: create one Kit per process, or reset viper between
+// constructions. Concurrent calls to New are serialized internally by
+// [viperInitMu], but that mutex does not prevent later viper reads (from
+// a different Kit) from observing mutated keys.
+//
+// TODO: refactor New to use a per-instance *viper.Viper (constructed via
+// viper.New()) so each Kit owns its own isolated config store and Options
+// no longer leak through the global singleton.
 type Options struct {
 	Model        string // Override model (e.g., "anthropic/claude-sonnet-4-5-20250929")
 	SystemPrompt string // Override system prompt
@@ -821,6 +844,76 @@ type Options struct {
 	Tools        []Tool // Custom tool set. If empty, AllTools() is used.
 	ExtraTools   []Tool // Additional tools added alongside core/MCP/extension tools.
 
+	// Generation parameters. These override the corresponding values from
+	// .kit.yml / KIT_* environment variables. Leaving a field at its
+	// zero/nil value means "use the configured default", which in turn
+	// falls back to per-model defaults (modelSettings / customModels) and
+	// finally to a last-resort SDK floor of 8192 for MaxTokens (matching
+	// the CLI --max-tokens default; sampling params fall through to
+	// provider-level defaults).
+	//
+	// Pointer types are used for sampling parameters so the SDK can
+	// distinguish "explicitly set to 0" from "leave alone".
+
+	// MaxTokens overrides the maximum output tokens per LLM response.
+	// 0 = let the precedence chain resolve a value (env → config →
+	// per-model → 8192 SDK floor, matching the CLI default). Setting a
+	// non-zero value here suppresses automatic right-sizing, matching
+	// the CLI's --max-tokens flag semantics. Bump this when generating
+	// long outputs (HTML artifacts, large refactors, etc.) to avoid
+	// silent truncation mid-tool-call. The cap also applies after
+	// model switches via [Kit.SetModel].
+	MaxTokens int
+
+	// ThinkingLevel sets the reasoning effort for models that support
+	// extended thinking. Valid values: "off", "low", "medium", "high".
+	// "" = let the precedence chain resolve a level (env → config →
+	// per-model → "off"). Use [Kit.SetThinkingLevel] to change at
+	// runtime.
+	ThinkingLevel string
+
+	// Temperature controls sampling randomness (typically 0.0–2.0).
+	// nil = leave provider/per-model default in place. Pointer type
+	// so explicit 0.0 (deterministic) is distinguishable from "unset".
+	Temperature *float32
+
+	// TopP is the nucleus-sampling cutoff (0.0–1.0).
+	// nil = leave provider/per-model default in place.
+	TopP *float32
+
+	// TopK limits sampling to the top K tokens.
+	// nil = leave provider/per-model default in place.
+	TopK *int32
+
+	// FrequencyPenalty discourages repeated tokens (OpenAI-family models).
+	// nil = leave provider/per-model default in place.
+	FrequencyPenalty *float32
+
+	// PresencePenalty discourages repeating topics (OpenAI-family models).
+	// nil = leave provider/per-model default in place.
+	PresencePenalty *float32
+
+	// Provider configuration. These override values normally read from
+	// .kit.yml or provider-specific environment variables. Useful when
+	// loading credentials from a secrets manager, pointing at custom
+	// OpenAI-compatible endpoints (LiteLLM, vLLM, Azure OpenAI, internal
+	// proxies), or running against self-hosted infrastructure.
+
+	// ProviderAPIKey overrides the API key used to authenticate with the
+	// model provider. "" = use the value from config or the
+	// provider-specific environment variable.
+	ProviderAPIKey string
+
+	// ProviderURL overrides the provider endpoint. "" = use the provider's
+	// default URL.
+	ProviderURL string
+
+	// TLSSkipVerify disables TLS certificate verification on provider
+	// HTTP clients. Only set this for self-signed certificates in
+	// development. Once enabled here it cannot be disabled via Options
+	// (use the config file or env var to opt back out).
+	TLSSkipVerify bool
+
 	// SkipConfig, when true, skips loading .kit.yml configuration files.
 	// Viper defaults (setSDKDefaults) and environment variables (KIT_*)
 	// are still applied. Use this for fully programmatic configuration.
@@ -979,14 +1072,29 @@ func InitTreeSession(opts *Options) (*session.TreeManager, error) {
 	return session.CreateTreeSession(sessionDir)
 }
 
-// New creates a Kit instance using the same initialization as the CLI.
-// It loads configuration, initializes MCP servers, creates the LLM model, and
-// sets up the agent for interaction. Returns an error if initialization fails.
-// viperInitMu serializes viper writes during kit.New(). Viper's global state
+// viperInitMu serializes viper writes during [New]. Viper's global state
 // is not thread-safe, so concurrent calls (e.g. parallel subagent spawns)
-// must not overlap the Set()/Get() window.
+// must not overlap the Set/Get window. Note that this mutex only protects
+// the construction window — it does not isolate long-lived Kit instances
+// from each other. See the "Global viper state warning" on [Options].
 var viperInitMu sync.Mutex
 
+// New creates a Kit instance using the same initialization as the CLI.
+// It loads configuration, initializes MCP servers, creates the LLM model, and
+// sets up the agent for interaction. Returns an error if initialization fails.
+//
+// Global viper state warning: fields on [Options] are applied by calling
+// [viper.Set] on viper's process-global store. As a result, two Kits
+// constructed in the same process are NOT isolated: the second New
+// overwrites viper keys set by the first, and any downstream reader
+// (e.g. [Kit.SetModel], [Kit.GetThinkingLevel]) will observe the most
+// recent value. Callers that need multiple independent Kits should call
+// viper.Reset() between constructions, or avoid constructing more than
+// one Kit per process. Writes during New are serialized by [viperInitMu].
+//
+// TODO: refactor to use a per-call viper.New() instance so each Kit owns
+// its own isolated config store and Options stop leaking through the
+// global singleton.
 func New(ctx context.Context, opts *Options) (*Kit, error) {
 	if opts == nil {
 		opts = &Options{}
@@ -1047,6 +1155,47 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
 		}
 		viper.Set("stream", opts.Streaming)
 
+		// Generation parameter overrides. Each Options field, when set,
+		// is pushed into viper here so the existing downstream code
+		// (BuildProviderConfig, SetModel, modelSettings lookups) picks
+		// it up uniformly. Pointer-typed sampling params use viper.Set
+		// only when non-nil so that nil means "leave provider/per-model
+		// default in place" (BuildProviderConfig keys off viper.IsSet).
+		if opts.MaxTokens > 0 {
+			viper.Set("max-tokens", opts.MaxTokens)
+		}
+		if opts.ThinkingLevel != "" {
+			viper.Set("thinking-level", opts.ThinkingLevel)
+		}
+		if opts.Temperature != nil {
+			viper.Set("temperature", *opts.Temperature)
+		}
+		if opts.TopP != nil {
+			viper.Set("top-p", *opts.TopP)
+		}
+		if opts.TopK != nil {
+			viper.Set("top-k", *opts.TopK)
+		}
+		if opts.FrequencyPenalty != nil {
+			viper.Set("frequency-penalty", *opts.FrequencyPenalty)
+		}
+		if opts.PresencePenalty != nil {
+			viper.Set("presence-penalty", *opts.PresencePenalty)
+		}
+
+		// Provider overrides. TLSSkipVerify only takes effect when true —
+		// callers wanting to force-disable should use the config file or
+		// env var instead.
+		if opts.ProviderAPIKey != "" {
+			viper.Set("provider-api-key", opts.ProviderAPIKey)
+		}
+		if opts.ProviderURL != "" {
+			viper.Set("provider-url", opts.ProviderURL)
+		}
+		if opts.TLSSkipVerify {
+			viper.Set("tls-skip-verify", true)
+		}
+
 		// Resolve working directory for context/skill discovery.
 		cwd = opts.SessionDir
 		if cwd == "" {
@@ -1132,6 +1281,17 @@ func New(ctx context.Context, opts *Options) (*Kit, error) {
 		if pcErr != nil {
 			return fmt.Errorf("failed to build provider config: %w", pcErr)
 		}
+
+		// SDK last-resort max-tokens floor. When nothing — Options, env,
+		// config, nor a per-model default — supplied a value, we land on
+		// zero here (viper.GetInt returns 0 for unset keys). Apply the
+		// SDK default directly on the struct rather than via viper so
+		// viper.IsSet("max-tokens") stays false: downstream right-sizing
+		// can still raise this toward the model's known output ceiling,
+		// and per-model modelSettings[...].maxTokens can still win.
+		if providerConfig.MaxTokens == 0 && opts.MaxTokens == 0 {
+			providerConfig.MaxTokens = sdkDefaultMaxTokens
+		}
 		modelString = viper.GetString("model")
 		debug = viper.GetBool("debug")
 		noExtensions = opts.NoExtensions || viper.GetBool("no-extensions")
diff --git a/pkg/kit/kit_test.go b/pkg/kit/kit_test.go
index 346cb1b7..7eead8a2 100644
--- a/pkg/kit/kit_test.go
+++ b/pkg/kit/kit_test.go
@@ -5,6 +5,8 @@ import (
 	"os"
 	"testing"
 
+	"github.com/spf13/viper"
+
 	kit "github.com/mark3labs/kit/pkg/kit"
 )
 
@@ -54,6 +56,225 @@ func TestNewWithOptions(t *testing.T) {
 	}
 }
 
+// TestNewWithGenerationOptions verifies that the SDK-only generation
+// parameter overrides on Options propagate all the way through to the
+// agent without requiring any viper.Set workarounds in caller code.
+func TestNewWithGenerationOptions(t *testing.T) {
+	if os.Getenv("ANTHROPIC_API_KEY") == "" {
+		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
+	}
+
+	ctx := context.Background()
+
+	// MaxTokens override — keep ThinkingLevel off so Anthropic's thinking
+	// budget doesn't auto-bump MaxTokens above what we configured.
+	t.Run("MaxTokens", func(t *testing.T) {
+		defer resetViper()
+
+		const want = 12345
+		host, err := kit.New(ctx, &kit.Options{
+			Model:     "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:     true,
+			MaxTokens: want,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if got := host.MaxTokens(); got != want {
+			t.Errorf("Options.MaxTokens=%d did not propagate; Kit.MaxTokens()=%d", want, got)
+		}
+		if !viper.IsSet("max-tokens") {
+			t.Error("viper.IsSet(\"max-tokens\") should be true after MaxTokens override")
+		}
+	})
+
+	// ThinkingLevel override — verified via the public getter, which
+	// reads back the configured (not provider-derived) level.
+	t.Run("ThinkingLevel", func(t *testing.T) {
+		defer resetViper()
+
+		const want = "high"
+		host, err := kit.New(ctx, &kit.Options{
+			Model:         "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:         true,
+			ThinkingLevel: want,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if got := host.GetThinkingLevel(); got != want {
+			t.Errorf("Options.ThinkingLevel=%q did not propagate; Kit.GetThinkingLevel()=%q", want, got)
+		}
+	})
+
+	// Temperature override — pointer semantics let callers distinguish
+	// "explicitly 0.0" from "unset", which we assert by pushing a distinct
+	// value and reading it back off viper's merged state.
+	t.Run("Temperature", func(t *testing.T) {
+		defer resetViper()
+
+		want := float32(0.12345)
+		host, err := kit.New(ctx, &kit.Options{
+			Model:       "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:       true,
+			Temperature: &want,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if !viper.IsSet("temperature") {
+			t.Fatal("viper.IsSet(\"temperature\") should be true after Temperature override")
+		}
+		if got := float32(viper.GetFloat64("temperature")); got != want {
+			t.Errorf("Options.Temperature=%v did not propagate; viper=%v", want, got)
+		}
+	})
+}
+
+// TestNewPreservesIsSetSemantics verifies that creating a Kit WITHOUT
+// populating the generation-param Options fields does NOT mark those
+// keys as explicitly set in viper. This is the precedence contract
+// that per-model defaults (ApplyModelSettings) and right-sizing
+// (rightSizeMaxTokens) rely on.
+//
+// Previously setSDKDefaults() used viper.SetDefault() for every param,
+// which caused viper.IsSet() to return true for all of them — silently
+// suppressing per-model defaults and pinning max-tokens at 4096 even
+// on models with much larger output limits.
+func TestNewPreservesIsSetSemantics(t *testing.T) {
+	if os.Getenv("ANTHROPIC_API_KEY") == "" {
+		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
+	}
+
+	defer resetViper()
+
+	ctx := context.Background()
+	host, err := kit.New(ctx, &kit.Options{
+		Model:      "anthropic/claude-sonnet-4-5-20250929",
+		Quiet:      true,
+		NoSession:  true,
+		SkipConfig: true, // isolate from any ~/.kit.yml values
+	})
+	if err != nil {
+		t.Fatalf("Failed to create Kit: %v", err)
+	}
+	defer func() { _ = host.Close() }()
+
+	// These keys must remain "unset" from viper's perspective so the
+	// downstream isExplicitlySet() checks allow per-model defaults to
+	// take effect.
+	checkKeys := []string{
+		"max-tokens",
+		"temperature",
+		"top-p",
+		"top-k",
+		"frequency-penalty",
+		"presence-penalty",
+		"thinking-level",
+	}
+
+	// With SkipConfig: true, InitConfig() is not invoked, so viper has
+	// no env-var bindings registered. Any IsSet() here would come purely
+	// from SDK-side SetDefault/Set calls — which is exactly what this
+	// test is guarding against.
+	for _, k := range checkKeys {
+		if viper.IsSet(k) {
+			t.Errorf("viper.IsSet(%q) == true when no Options field set it "+
+				"(SDK defaults must not corrupt IsSet semantics)", k)
+		}
+	}
+}
+
+// TestNewWithProviderOptions verifies that programmatic provider overrides
+// (API key, URL) take effect without env vars or config files, and that
+// Options.ProviderAPIKey *wins* over any pre-existing viper state.
+func TestNewWithProviderOptions(t *testing.T) {
+	if os.Getenv("ANTHROPIC_API_KEY") == "" {
+		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
+	}
+
+	ctx := context.Background()
+
+	t.Run("succeeds with API key from Options", func(t *testing.T) {
+		defer resetViper()
+
+		apiKey := os.Getenv("ANTHROPIC_API_KEY")
+		host, err := kit.New(ctx, &kit.Options{
+			Model:          "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:          true,
+			NoSession:      true,
+			ProviderAPIKey: apiKey,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit with ProviderAPIKey option: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if got := viper.GetString("provider-api-key"); got != apiKey {
+			t.Errorf("Options.ProviderAPIKey did not propagate to viper; got %q (len=%d)", got, len(got))
+		}
+	})
+
+	// Override precedence: even when viper already holds a different
+	// provider-api-key value (as it would if a config file or earlier
+	// Set() call populated one), Options.ProviderAPIKey must win.
+	t.Run("Options override beats pre-existing viper state", func(t *testing.T) {
+		defer resetViper()
+
+		viper.Set("provider-api-key", "sk-config-file-placeholder")
+
+		want := "sk-from-options-override"
+		// Use an OpenAI-flavored model so the validation path accepts
+		// the placeholder without attempting a real Anthropic handshake.
+		host, err := kit.New(ctx, &kit.Options{
+			Model:            "openai/gpt-4o-mini",
+			Quiet:            true,
+			NoSession:        true,
+			NoExtensions:     true,
+			DisableCoreTools: true,
+			ProviderAPIKey:   want,
+		})
+		// Creation may still fail if the model registry is strict, but
+		// we only care that the override reached viper before any
+		// provider handshake happened.
+		if host != nil {
+			defer func() { _ = host.Close() }()
+		}
+		_ = err
+
+		if got := viper.GetString("provider-api-key"); got != want {
+			t.Errorf("Options.ProviderAPIKey did not override pre-existing viper value; got %q, want %q", got, want)
+		}
+	})
+
+	// ProviderURL override must also reach viper.
+	t.Run("ProviderURL propagates", func(t *testing.T) {
+		defer resetViper()
+
+		const want = "https://custom.example.com/v1"
+		host, err := kit.New(ctx, &kit.Options{
+			Model:       "anthropic/claude-sonnet-4-5-20250929",
+			Quiet:       true,
+			NoSession:   true,
+			ProviderURL: want,
+		})
+		if err != nil {
+			t.Fatalf("Failed to create Kit with ProviderURL option: %v", err)
+		}
+		defer func() { _ = host.Close() }()
+
+		if got := viper.GetString("provider-url"); got != want {
+			t.Errorf("Options.ProviderURL did not propagate; got %q, want %q", got, want)
+		}
+	})
+}
+
 func TestSessionManagement(t *testing.T) {
 	if os.Getenv("ANTHROPIC_API_KEY") == "" {
 		t.Skip("Skipping test: ANTHROPIC_API_KEY not set")
@@ -81,3 +302,7 @@ func TestSessionManagement(t *testing.T) {
 		t.Error("Expected non-empty session ID")
 	}
 }
+
+// resetViper wipes viper's global state so a test case doesn't leak
+// viper.Set() calls into the next one. Used via defer in subtests.
+func resetViper() { viper.Reset() }
diff --git a/skills/kit-sdk/SKILL.md b/skills/kit-sdk/SKILL.md
index b9527818..0dc1a02e 100644
--- a/skills/kit-sdk/SKILL.md
+++ b/skills/kit-sdk/SKILL.md
@@ -80,6 +80,23 @@ host, err := kit.New(ctx, &kit.Options{
     Quiet:     true, // suppress debug output
     Debug:     true, // enable debug logging
 
+    // Generation parameters — override env/config/per-model defaults.
+    // Leaving a field at its zero/nil value lets the precedence chain
+    // resolve a value (KIT_* env → .kit.yml → modelSettings/customModels →
+    // 8192 floor for MaxTokens, provider defaults for samplers).
+    MaxTokens:        16384,             // 0 = auto-resolve; non-zero suppresses right-sizing
+    ThinkingLevel:    "medium",          // "off", "low", "medium", "high" ("" = default)
+    Temperature:      ptrFloat32(0.2),   // pointer so explicit 0.0 != unset
+    TopP:             nil,                // nil = leave provider/per-model default
+    TopK:             nil,                // nil = leave provider/per-model default
+    FrequencyPenalty: nil,
+    PresencePenalty:  nil,
+
+    // Provider configuration — override env/config without viper.Set workarounds.
+    ProviderAPIKey: "sk-...",                    // "" = use config / provider env var
+    ProviderURL:    "https://proxy.internal/v1", // "" = provider default endpoint
+    TLSSkipVerify:  false,                       // true only; can't force-disable via Options
+
     // Session
     SessionDir:  "/path/to/project",  // base dir for session discovery (default: cwd)
     SessionPath: "/path/to/session.jsonl", // open specific session file
@@ -118,12 +135,34 @@ host, err := kit.New(ctx, &kit.Options{
         "docs": mcpSrv,  // *server.MCPServer from mcp-go — no subprocess needed
     },
 })
+
+// Tiny helper to take the address of a literal for pointer fields.
+func ptrFloat32(v float32) *float32 { return &v }
 ```
 
 **Critical distinction**: `Tools` replaces ALL default tools (core + MCP + extension). `ExtraTools` adds tools alongside the defaults. Use `Tools` to restrict the agent's capabilities; use `ExtraTools` to extend them.
 
 **In-process MCP servers** bypass subprocess spawning entirely. Pass `*server.MCPServer` instances from mcp-go via `InProcessMCPServers` or call `AddInProcessMCPServer()` at runtime.
 
+### Generation & provider Options (cheat sheet)
+
+| Field | Type | Empty/nil means | Notes |
+|-------|------|-----------------|-------|
+| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 8192 floor) | Non-zero suppresses `rightSizeMaxTokens` |
+| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) |
+| `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset |
+| `TopP` | `*float32` | Leave provider/per-model default | |
+| `TopK` | `*int32` | Leave provider/per-model default | |
+| `FrequencyPenalty` | `*float32` | Leave provider/per-model default | OpenAI-family |
+| `PresencePenalty` | `*float32` | Leave provider/per-model default | OpenAI-family |
+| `ProviderAPIKey` | `string` | Use config / provider env var | Overrides pre-existing viper state |
+| `ProviderURL` | `string` | Use provider default endpoint | Same base URL flag as `--provider-url` |
+| `TLSSkipVerify` | `bool` | — | Only effective when `true`; cannot force-disable via Options |
+
+These fields eliminate the old `viper.Set("max-tokens", 16384)` dance many
+downstream embedders used to do before calling `kit.New()`. Everything is
+now discoverable via godoc on `kit.Options`.
+
 ---
 
 ## Prompt Methods
diff --git a/www/pages/configuration.md b/www/pages/configuration.md
index 802fa5c4..2176f07d 100644
--- a/www/pages/configuration.md
+++ b/www/pages/configuration.md
@@ -175,10 +175,24 @@ modelSettings:
 | `thinkingLevel` | string | Thinking level override |
 | `systemPrompt` | string | Per-model system prompt (used when no explicit prompt is set) |
 
-Settings from `modelSettings` and `customModels.params` act as model-level defaults — explicit CLI flags and global config values always take precedence.
+Settings from `modelSettings` and `customModels.params` act as model-level defaults — explicit CLI flags, `KIT_*` environment variables, global config values, and SDK `Options.*` fields all take precedence over them.
 
 When switching models via `/model` or `SetModel()`, if the new model has a per-model system prompt and no custom global prompt was set, the per-model prompt automatically replaces the previous one.
 
+### Precedence summary
+
+For the generation and provider parameters documented above, the resolved value at runtime comes from the first source that sets it:
+
+1. CLI flag (e.g. `--max-tokens`, `--temperature`, `--provider-api-key`)
+2. SDK `Options.X` when embedding Kit as a library (`kit.Options.MaxTokens`, `Temperature`, `ProviderAPIKey`, etc.)
+3. `KIT_*` environment variable (`KIT_MAX_TOKENS`, `KIT_TEMPERATURE`, ...)
+4. `.kit.yml` / `.kit.yaml` / `.kit.json` (project-local, then global)
+5. Per-model defaults (`modelSettings[provider/model]` / `customModels[...].params`)
+6. Provider-level defaults (e.g. Anthropic's own temperature default)
+7. SDK last-resort floor — currently an 8192 output-token ceiling matching the CLI `--max-tokens` default, auto-raised per-model up to 32768 when the model's catalog ceiling is higher
+
+See the [SDK options reference](/sdk/options) for the full list of `kit.Options` fields that map to these keys.
+
 ## Theme configuration
 
 ```yaml
diff --git a/www/pages/sdk/options.md b/www/pages/sdk/options.md
index 49c453d7..29a55911 100644
--- a/www/pages/sdk/options.md
+++ b/www/pages/sdk/options.md
@@ -22,6 +22,20 @@ host, err := kit.New(ctx, &kit.Options{
     Quiet:        true,
     Debug:        true,
 
+    // Generation parameters (override env/config/per-model defaults)
+    MaxTokens:        16384,              // 0 = auto-resolve; non-zero suppresses right-sizing
+    ThinkingLevel:    "medium",           // "off", "low", "medium", "high"
+    Temperature:      ptrFloat32(0.2),    // pointer so explicit 0.0 != unset
+    TopP:             nil,                 // nil = provider/per-model default
+    TopK:             nil,
+    FrequencyPenalty: nil,
+    PresencePenalty:  nil,
+
+    // Provider configuration
+    ProviderAPIKey: "sk-...",                      // "" = use config / provider env var
+    ProviderURL:    "https://proxy.internal/v1",  // "" = provider default endpoint
+    TLSSkipVerify:  false,                         // only effective when true
+
     // Session
     SessionPath:  "./session.jsonl",
     SessionDir:   "/custom/sessions/",
@@ -65,6 +79,8 @@ host, err := kit.New(ctx, &kit.Options{
 
 ## Options fields
 
+### Core
+
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
 | `Model` | `string` | config default | Model string (provider/model format) |
@@ -74,25 +90,96 @@ host, err := kit.New(ctx, &kit.Options{
 | `Streaming` | `bool` | `true` | Enable streaming output |
 | `Quiet` | `bool` | `false` | Suppress output |
 | `Debug` | `bool` | `false` | Enable debug logging |
+
+### Generation parameters
+
+These fields override the corresponding values from `.kit.yml` / `KIT_*`
+environment variables. Leaving a field at its zero/nil value lets the
+precedence chain resolve a value (`KIT_*` env → config file → per-model
+defaults from `modelSettings`/`customModels` → an 8192 SDK floor for
+`MaxTokens` (matching the CLI `--max-tokens` default) and provider-level
+defaults for samplers).
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `MaxTokens` | `int` | auto-resolved | Max output tokens per response. `0` = auto-resolve; non-zero suppresses automatic right-sizing (same semantics as `--max-tokens`). |
+| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"low"`, `"medium"`, `"high"` (some providers also accept `"minimal"`). `""` falls through to config/env/per-model/`"off"`. |
+| `Temperature` | `*float32` | — | Sampling randomness. Pointer type so explicit `0.0` is distinguishable from "unset". |
+| `TopP` | `*float32` | — | Nucleus sampling cutoff. `nil` leaves provider/per-model default. |
+| `TopK` | `*int32` | — | Top-K sampling limit. `nil` leaves provider/per-model default. |
+| `FrequencyPenalty` | `*float32` | — | OpenAI-family frequency penalty. `nil` leaves provider default. |
+| `PresencePenalty` | `*float32` | — | OpenAI-family presence penalty. `nil` leaves provider default. |
+
+Pointer-typed samplers are populated via a tiny helper:
+
+```go
+func ptrFloat32(v float32) *float32 { return &v }
+```
+
+These fields eliminate the need for `viper.Set()` calls before `kit.New()`
+when embedding Kit as a library.
+
+### Provider configuration
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `ProviderAPIKey` | `string` | — | API key used to authenticate with the provider. `""` falls back to config / provider-specific env var (e.g. `ANTHROPIC_API_KEY`). When set, overrides any pre-existing viper state. |
+| `ProviderURL` | `string` | — | Override the provider endpoint (e.g. LiteLLM, vLLM, Azure OpenAI, internal proxy). `""` = provider default. |
+| `TLSSkipVerify` | `bool` | `false` | Disable TLS certificate verification on the provider HTTP client. Only effective when `true`; to force-disable, use config file or env var instead. For self-signed dev certs only. |
+
+### Session
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
 | `SessionPath` | `string` | — | Open a specific session file |
 | `SessionDir` | `string` | — | Base directory for session discovery |
 | `Continue` | `bool` | `false` | Resume most recent session |
 | `NoSession` | `bool` | `false` | Ephemeral mode (no persistence) |
+| `SessionManager` | `SessionManager` | — | Custom session backend (advanced) |
+
+### Tools & extensions
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
 | `Tools` | `[]Tool` | — | Replace the entire default tool set |
 | `ExtraTools` | `[]Tool` | — | Additional tools alongside core/MCP/extension tools |
 | `DisableCoreTools` | `bool` | `false` | Use no core tools (0 tools, for chat-only) |
-| `SkipConfig` | `bool` | `false` | Skip .kit.yml file loading |
-| `AutoCompact` | `bool` | `false` | Auto-compact when near context limit |
-| `CompactionOptions` | `*CompactionOptions` | — | Configuration for auto-compaction |
+| `NoExtensions` | `bool` | `false` | Disable Yaegi extension loading |
+| `NoContextFiles` | `bool` | `false` | Disable automatic AGENTS.md loading |
+
+### Skills & configuration
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `SkipConfig` | `bool` | `false` | Skip `.kit.yml` file loading (viper defaults + env vars still apply) |
 | `Skills` | `[]string` | — | Explicit skill files/dirs to load |
 | `SkillsDir` | `string` | — | Override default skills directory |
 | `NoSkills` | `bool` | `false` | Disable skill loading entirely |
-| `NoExtensions` | `bool` | `false` | Disable Yaegi extension loading |
-| `NoContextFiles` | `bool` | `false` | Disable automatic AGENTS.md loading |
-| `SessionManager` | `SessionManager` | — | Custom session backend (advanced) |
+
+### Compaction & MCP
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `AutoCompact` | `bool` | `false` | Auto-compact when near context limit |
+| `CompactionOptions` | `*CompactionOptions` | — | Configuration for auto-compaction |
 | `MCPTokenStoreFactory` | `func` | — | Custom OAuth token storage for MCP servers |
 | `InProcessMCPServers` | `map[string]*MCPServer` | — | In-process mcp-go servers (no subprocess) |
 
+## Precedence
+
+For any given generation or provider field, the effective value is resolved
+in this order (highest priority first):
+
+1. `Options.X` (SDK caller)
+2. `KIT_X` environment variable
+3. `.kit.yml` (project-local then `~/.kit.yml`)
+4. Per-model defaults (`modelSettings[provider/model]` or `customModels[...].params`)
+5. Provider-level defaults (e.g. Anthropic's own temperature default)
+6. SDK last-resort floor (currently: `MaxTokens = 8192`, matching the CLI `--max-tokens` default)
+
+Sampling params that remain `nil` after the SDK resolution step are left out
+of the provider call entirely, so the LLM library applies its own default.
+
 ## Tool configuration
 
 **`Tools`** replaces ALL default tools (core + MCP + extension). **`ExtraTools`** adds tools alongside the defaults. Use `Tools` to restrict capabilities; use `ExtraTools` to extend them.
diff --git a/www/pages/sdk/overview.md b/www/pages/sdk/overview.md
index 4a45eff0..b16c63d0 100644
--- a/www/pages/sdk/overview.md
+++ b/www/pages/sdk/overview.md
@@ -106,6 +106,27 @@ For advanced use, return a `kit.ToolOutput` struct directly with `Data`, `MediaT
 
 Use `kit.NewParallelTool` for tools that are safe to run concurrently. Use `kit.ToolCallIDFromContext(ctx)` to retrieve the LLM-assigned call ID for logging or tracing.
 
+## Generation & provider overrides
+
+SDK consumers can configure generation parameters and provider endpoints
+entirely in-code via `Options`, without touching `.kit.yml` or `viper.Set()`:
+
+```go
+host, _ := kit.New(ctx, &kit.Options{
+    Model:          "anthropic/claude-sonnet-4-5-20250929",
+    MaxTokens:      16384,             // 0 = auto-resolve (env → config → per-model → floor)
+    ThinkingLevel:  "high",            // "off" | "low" | "medium" | "high"
+    Temperature:    ptrFloat32(0.2),   // nil = provider/per-model default
+    ProviderAPIKey: os.Getenv("MY_SECRET"), // overrides pre-existing viper state
+    ProviderURL:    "https://proxy.internal/v1",
+})
+
+func ptrFloat32(v float32) *float32 { return &v }
+```
+
+See [Options](/sdk/options#generation-parameters) for the full field reference,
+including `TopP`, `TopK`, `FrequencyPenalty`, `PresencePenalty`, and `TLSSkipVerify`.
+
 ## Event system
 
 Subscribe to events for monitoring: