From 25c7a0e709ecddb2416af5bec05bcde28538ea2d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 11:05:47 +0000 Subject: [PATCH 1/4] Initial plan From b063c2d87bb9a7621feb7222c2f91c4ff8c6a5b7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 11:12:30 +0000 Subject: [PATCH 2/4] Fix routed mode session timeout: read MCP_GATEWAY_SESSION_TIMEOUT instead of hardcoded 30 min Long-running agentic workflows (>30 min) were failing with "session not found" errors in routed mode because the SDK SessionTimeout was hardcoded to 30 minutes. This change makes routed mode read MCP_GATEWAY_SESSION_TIMEOUT (same variable as unified mode) with a 6h default, matching GitHub Actions' default workflow timeout and consistent with unified mode's behaviour. Agent-Logs-Url: https://github.com/github/gh-aw-mcpg/sessions/fa3e2c6f-c274-4541-9e13-8a608637925c Co-authored-by: dsyme <7204669+dsyme@users.noreply.github.com> --- AGENTS.md | 2 +- docs/ENVIRONMENT_VARIABLES.md | 2 +- internal/server/routed.go | 6 +++- internal/server/routed_test.go | 55 ++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 3 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index bdd4d4fa3..20285fe7b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -385,7 +385,7 @@ DEBUG_COLORS=0 DEBUG=* ./awmg --config config.toml - `MCP_GATEWAY_PAYLOAD_DIR` - Large payload storage directory (sets default for `--payload-dir` flag, default: `/tmp/jq-payloads`) - `MCP_GATEWAY_PAYLOAD_PATH_PREFIX` - Path prefix for remapping payloadPath returned to clients (sets default for `--payload-path-prefix` flag, default: empty) - `MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD` - Size threshold in bytes for payload storage; payloads larger than this are stored to disk (sets default for `--payload-size-threshold` flag, default: `524288`) -- `MCP_GATEWAY_SESSION_TIMEOUT` - Session timeout for unified mode (`/mcp`) stateful sessions. Accepts Go duration strings (e.g., `30m`, `1h`, `2h30m`). Routed mode is unaffected (hardcoded 30 min). (default: `6h`) +- `MCP_GATEWAY_SESSION_TIMEOUT` - Session timeout for stateful sessions in both unified mode (`/mcp`) and routed mode (`/mcp/`). Accepts Go duration strings (e.g., `30m`, `1h`, `2h30m`). (default: `6h`) - `DOCKER_HOST` - Docker daemon socket path (default: `/var/run/docker.sock`) - `MCP_GATEWAY_GUARDS_SINK_SERVER_IDS` - Comma-separated server IDs whose RPC JSONL logs should include agent secrecy/integrity tag snapshots (sets default for `--guards-sink-server-ids`) - `MCP_GATEWAY_GUARDS_MODE` - Guards enforcement mode: `strict` (deny violations), `filter` (remove denied tools), `propagate` (auto-adjust agent labels) (sets default for `--guards-mode`, default: `strict`) diff --git a/docs/ENVIRONMENT_VARIABLES.md b/docs/ENVIRONMENT_VARIABLES.md index 1e0ef91d4..48926c286 100644 --- a/docs/ENVIRONMENT_VARIABLES.md +++ b/docs/ENVIRONMENT_VARIABLES.md @@ -25,7 +25,7 @@ When running locally (`run.sh`), these variables are optional (warnings shown if | `MCP_GATEWAY_PAYLOAD_DIR` | Large payload storage directory (sets default for `--payload-dir` flag) | `/tmp/jq-payloads` | | `MCP_GATEWAY_PAYLOAD_PATH_PREFIX` | Path prefix for remapping payloadPath returned to clients (sets default for `--payload-path-prefix` flag) | (empty - use actual filesystem path) | | `MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD` | Size threshold in bytes for payload storage (sets default for `--payload-size-threshold` flag) | `524288` | -| `MCP_GATEWAY_SESSION_TIMEOUT` | Session timeout for unified mode (`/mcp`) stateful sessions. Accepts Go duration strings (e.g., `30m`, `1h`). Default is 6 hours to match the GitHub Actions default timeout. Routed mode is unaffected (hardcoded 30 min). | `6h` | +| `MCP_GATEWAY_SESSION_TIMEOUT` | Session timeout for stateful sessions in both unified (`/mcp`) and routed (`/mcp/`) modes. Accepts Go duration strings (e.g., `30m`, `1h`). Default is 6 hours to match the GitHub Actions default timeout. | `6h` | | `MCP_GATEWAY_WASM_GUARDS_DIR` | Root directory for per-server WASM guards (`//*.wasm`, first match is loaded) | (disabled) | | `MCP_GATEWAY_GUARDS_MODE` | Guards enforcement mode: `strict` (deny violations), `filter` (remove denied tools), `propagate` (auto-adjust agent labels) (sets default for `--guards-mode`) | `strict` | | `MCP_GATEWAY_GUARDS_SINK_SERVER_IDS` | Comma-separated sink server IDs for JSONL guards tag enrichment (sets default for `--guards-sink-server-ids`) | (disabled) | diff --git a/internal/server/routed.go b/internal/server/routed.go index 0d5215a40..882ce2326 100644 --- a/internal/server/routed.go +++ b/internal/server/routed.go @@ -11,6 +11,7 @@ import ( "time" "github.com/github/gh-aw-mcpg/internal/auth" + "github.com/github/gh-aw-mcpg/internal/envutil" "github.com/github/gh-aw-mcpg/internal/httputil" "github.com/github/gh-aw-mcpg/internal/logger" "github.com/github/gh-aw-mcpg/internal/version" @@ -140,7 +141,10 @@ func CreateHTTPServerForRoutedMode(addr string, unifiedServer *UnifiedServer, ap // Create server cache for session-aware server instances. // TTL matches the SDK SessionTimeout so cache entries expire with sessions. - routedSessionTimeout := 30 * time.Minute + // Reads MCP_GATEWAY_SESSION_TIMEOUT (same variable as unified mode) with a 6h default + // so that long-running agentic workflows (e.g. >30 min GitHub Actions jobs) do not + // receive spurious "session not found" errors. + routedSessionTimeout := envutil.GetEnvDuration("MCP_GATEWAY_SESSION_TIMEOUT", 6*time.Hour) serverCache := newFilteredServerCache(routedSessionTimeout) // Create a proxy for each backend server diff --git a/internal/server/routed_test.go b/internal/server/routed_test.go index aefcc0ece..93663d492 100644 --- a/internal/server/routed_test.go +++ b/internal/server/routed_test.go @@ -808,3 +808,58 @@ func TestCreateHTTPServerForRoutedMode_OAuth(t *testing.T) { }) } } + +// TestRoutedModeSessionTimeout_ReadsEnvVar verifies that CreateHTTPServerForRoutedMode +// uses MCP_GATEWAY_SESSION_TIMEOUT for the SDK session timeout, matching unified mode. +// This ensures long-running agentic workflows (>30 min) do not get spurious +// "session not found" errors that would occur with the previous hardcoded 30-minute timeout. +func TestRoutedModeSessionTimeout_ReadsEnvVar(t *testing.T) { + // Verify that the timeout used is driven by MCP_GATEWAY_SESSION_TIMEOUT + // by checking with a custom value and with the default. + t.Run("custom timeout from env", func(t *testing.T) { + t.Setenv("MCP_GATEWAY_SESSION_TIMEOUT", "2h") + + ctx := context.Background() + cfg := &config.Config{Servers: map[string]*config.ServerConfig{}} + us, err := NewUnified(ctx, cfg) + require.NoError(t, err) + defer us.Close() + + // CreateHTTPServerForRoutedMode internally calls envutil.GetEnvDuration. + // We verify indirectly via newFilteredServerCache: a cache created with the env- + // driven timeout must not evict an entry that is younger than 2 hours. + cache := newFilteredServerCache(2 * time.Hour) + callCount := 0 + creator := func() *sdk.Server { + callCount++ + return sdk.NewServer(&sdk.Implementation{Name: "test", Version: "1.0"}, &sdk.ServerOptions{}) + } + + cache.getOrCreate("backend", "session1", creator) + // Entry was just created; it should NOT be evicted by the 2h TTL cache. + cache.getOrCreate("backend", "session1", creator) + assert.Equal(t, 1, callCount, "session should be reused within the 2h timeout") + }) + + t.Run("default 6h timeout when env unset", func(t *testing.T) { + t.Setenv("MCP_GATEWAY_SESSION_TIMEOUT", "") + + ctx := context.Background() + cfg := &config.Config{Servers: map[string]*config.ServerConfig{}} + us, err := NewUnified(ctx, cfg) + require.NoError(t, err) + defer us.Close() + + // With no env var, the default is 6 hours; a freshly-created session must survive. + cache := newFilteredServerCache(6 * time.Hour) + callCount := 0 + creator := func() *sdk.Server { + callCount++ + return sdk.NewServer(&sdk.Implementation{Name: "test", Version: "1.0"}, &sdk.ServerOptions{}) + } + + cache.getOrCreate("backend", "session1", creator) + cache.getOrCreate("backend", "session1", creator) + assert.Equal(t, 1, callCount, "session should be reused within the 6h default timeout") + }) +} From 4ef4771dce86ddb2dad23bf3d81a05db946a6740 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 11:14:32 +0000 Subject: [PATCH 3/4] Fix test: use CreateHTTPServerForRoutedMode in session timeout test for real integration coverage Agent-Logs-Url: https://github.com/github/gh-aw-mcpg/sessions/fa3e2c6f-c274-4541-9e13-8a608637925c Co-authored-by: dsyme <7204669+dsyme@users.noreply.github.com> --- internal/server/routed_test.go | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/internal/server/routed_test.go b/internal/server/routed_test.go index 93663d492..761955d13 100644 --- a/internal/server/routed_test.go +++ b/internal/server/routed_test.go @@ -814,29 +814,29 @@ func TestCreateHTTPServerForRoutedMode_OAuth(t *testing.T) { // This ensures long-running agentic workflows (>30 min) do not get spurious // "session not found" errors that would occur with the previous hardcoded 30-minute timeout. func TestRoutedModeSessionTimeout_ReadsEnvVar(t *testing.T) { - // Verify that the timeout used is driven by MCP_GATEWAY_SESSION_TIMEOUT - // by checking with a custom value and with the default. + ctx := context.Background() + cfg := &config.Config{Servers: map[string]*config.ServerConfig{}} + us, err := NewUnified(ctx, cfg) + require.NoError(t, err) + defer us.Close() + t.Run("custom timeout from env", func(t *testing.T) { t.Setenv("MCP_GATEWAY_SESSION_TIMEOUT", "2h") - ctx := context.Background() - cfg := &config.Config{Servers: map[string]*config.ServerConfig{}} - us, err := NewUnified(ctx, cfg) - require.NoError(t, err) - defer us.Close() + // CreateHTTPServerForRoutedMode calls envutil.GetEnvDuration("MCP_GATEWAY_SESSION_TIMEOUT", 6h) + // and passes the result to both newFilteredServerCache and sdk.StreamableHTTPOptions.SessionTimeout. + // Call it here to confirm no panic or build error with the env var set. + httpServer := CreateHTTPServerForRoutedMode(":0", us, "", "") + require.NotNil(t, httpServer, "HTTP server should be created with custom session timeout") - // CreateHTTPServerForRoutedMode internally calls envutil.GetEnvDuration. - // We verify indirectly via newFilteredServerCache: a cache created with the env- - // driven timeout must not evict an entry that is younger than 2 hours. + // Verify the cache honours the 2h TTL: a just-created entry must NOT be evicted. cache := newFilteredServerCache(2 * time.Hour) callCount := 0 creator := func() *sdk.Server { callCount++ return sdk.NewServer(&sdk.Implementation{Name: "test", Version: "1.0"}, &sdk.ServerOptions{}) } - cache.getOrCreate("backend", "session1", creator) - // Entry was just created; it should NOT be evicted by the 2h TTL cache. cache.getOrCreate("backend", "session1", creator) assert.Equal(t, 1, callCount, "session should be reused within the 2h timeout") }) @@ -844,20 +844,16 @@ func TestRoutedModeSessionTimeout_ReadsEnvVar(t *testing.T) { t.Run("default 6h timeout when env unset", func(t *testing.T) { t.Setenv("MCP_GATEWAY_SESSION_TIMEOUT", "") - ctx := context.Background() - cfg := &config.Config{Servers: map[string]*config.ServerConfig{}} - us, err := NewUnified(ctx, cfg) - require.NoError(t, err) - defer us.Close() + httpServer := CreateHTTPServerForRoutedMode(":0", us, "", "") + require.NotNil(t, httpServer, "HTTP server should be created with default session timeout") - // With no env var, the default is 6 hours; a freshly-created session must survive. + // With no env var the default is 6 hours; a freshly-created session must survive. cache := newFilteredServerCache(6 * time.Hour) callCount := 0 creator := func() *sdk.Server { callCount++ return sdk.NewServer(&sdk.Implementation{Name: "test", Version: "1.0"}, &sdk.ServerOptions{}) } - cache.getOrCreate("backend", "session1", creator) cache.getOrCreate("backend", "session1", creator) assert.Equal(t, 1, callCount, "session should be reused within the 6h default timeout") From fd8c9ecfe9b790cb60db384a5b00d898e9a6b7f6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 13:30:38 +0000 Subject: [PATCH 4/4] Refactor: extract getRoutedSessionTimeout() helper and fix test to assert env-var wiring directly - Extract getRoutedSessionTimeout() in routed.go so the env-var lookup is testable independently of the HTTP server construction. - Rewrite TestRoutedModeSessionTimeout_ReadsEnvVar to call getRoutedSessionTimeout() directly, asserting the returned value matches the env var (custom) or 6h (default). This actually verifies the wiring rather than testing an unrelated cache instance. - Rename "env unset" subtest to "env var is empty" to match t.Setenv("", "") semantics. Agent-Logs-Url: https://github.com/github/gh-aw-mcpg/sessions/5ceee355-b2fa-422c-9036-251370a5ff51 Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com> --- internal/server/routed.go | 14 ++++++--- internal/server/routed_test.go | 55 +++++++++------------------------- 2 files changed, 24 insertions(+), 45 deletions(-) diff --git a/internal/server/routed.go b/internal/server/routed.go index 882ce2326..28a4ac16d 100644 --- a/internal/server/routed.go +++ b/internal/server/routed.go @@ -121,6 +121,13 @@ func (c *filteredServerCache) getOrCreate(backendID, sessionID string, creator f return server } +// getRoutedSessionTimeout returns the session timeout for routed mode by reading +// MCP_GATEWAY_SESSION_TIMEOUT with a 6-hour default, matching unified mode behaviour. +// It is extracted as a package-level function so tests can assert the env-var wiring directly. +func getRoutedSessionTimeout() time.Duration { + return envutil.GetEnvDuration("MCP_GATEWAY_SESSION_TIMEOUT", 6*time.Hour) +} + // CreateHTTPServerForRoutedMode creates an HTTP server for routed mode // In routed mode, each backend is accessible at /mcp/ // Multiple routes from the same Authorization header share a session @@ -141,10 +148,9 @@ func CreateHTTPServerForRoutedMode(addr string, unifiedServer *UnifiedServer, ap // Create server cache for session-aware server instances. // TTL matches the SDK SessionTimeout so cache entries expire with sessions. - // Reads MCP_GATEWAY_SESSION_TIMEOUT (same variable as unified mode) with a 6h default - // so that long-running agentic workflows (e.g. >30 min GitHub Actions jobs) do not - // receive spurious "session not found" errors. - routedSessionTimeout := envutil.GetEnvDuration("MCP_GATEWAY_SESSION_TIMEOUT", 6*time.Hour) + // Long-running agentic workflows (e.g. >30 min GitHub Actions jobs) need this + // to be at least as long as the workflow to avoid spurious "session not found" errors. + routedSessionTimeout := getRoutedSessionTimeout() serverCache := newFilteredServerCache(routedSessionTimeout) // Create a proxy for each backend server diff --git a/internal/server/routed_test.go b/internal/server/routed_test.go index 761955d13..74e37cfd3 100644 --- a/internal/server/routed_test.go +++ b/internal/server/routed_test.go @@ -809,53 +809,26 @@ func TestCreateHTTPServerForRoutedMode_OAuth(t *testing.T) { } } -// TestRoutedModeSessionTimeout_ReadsEnvVar verifies that CreateHTTPServerForRoutedMode -// uses MCP_GATEWAY_SESSION_TIMEOUT for the SDK session timeout, matching unified mode. -// This ensures long-running agentic workflows (>30 min) do not get spurious -// "session not found" errors that would occur with the previous hardcoded 30-minute timeout. +// TestRoutedModeSessionTimeout_ReadsEnvVar verifies that the session timeout used by +// CreateHTTPServerForRoutedMode is driven by MCP_GATEWAY_SESSION_TIMEOUT, matching +// unified mode. Long-running agentic workflows (>30 min) previously failed with +// "session not found" errors because routed mode had a hardcoded 30-minute timeout. func TestRoutedModeSessionTimeout_ReadsEnvVar(t *testing.T) { - ctx := context.Background() - cfg := &config.Config{Servers: map[string]*config.ServerConfig{}} - us, err := NewUnified(ctx, cfg) - require.NoError(t, err) - defer us.Close() - t.Run("custom timeout from env", func(t *testing.T) { t.Setenv("MCP_GATEWAY_SESSION_TIMEOUT", "2h") - - // CreateHTTPServerForRoutedMode calls envutil.GetEnvDuration("MCP_GATEWAY_SESSION_TIMEOUT", 6h) - // and passes the result to both newFilteredServerCache and sdk.StreamableHTTPOptions.SessionTimeout. - // Call it here to confirm no panic or build error with the env var set. - httpServer := CreateHTTPServerForRoutedMode(":0", us, "", "") - require.NotNil(t, httpServer, "HTTP server should be created with custom session timeout") - - // Verify the cache honours the 2h TTL: a just-created entry must NOT be evicted. - cache := newFilteredServerCache(2 * time.Hour) - callCount := 0 - creator := func() *sdk.Server { - callCount++ - return sdk.NewServer(&sdk.Implementation{Name: "test", Version: "1.0"}, &sdk.ServerOptions{}) - } - cache.getOrCreate("backend", "session1", creator) - cache.getOrCreate("backend", "session1", creator) - assert.Equal(t, 1, callCount, "session should be reused within the 2h timeout") + got := getRoutedSessionTimeout() + assert.Equal(t, 2*time.Hour, got, "getRoutedSessionTimeout should return the value from MCP_GATEWAY_SESSION_TIMEOUT") }) - t.Run("default 6h timeout when env unset", func(t *testing.T) { + t.Run("default 6h timeout when env var is empty", func(t *testing.T) { t.Setenv("MCP_GATEWAY_SESSION_TIMEOUT", "") + got := getRoutedSessionTimeout() + assert.Equal(t, 6*time.Hour, got, "getRoutedSessionTimeout should default to 6h when MCP_GATEWAY_SESSION_TIMEOUT is empty") + }) - httpServer := CreateHTTPServerForRoutedMode(":0", us, "", "") - require.NotNil(t, httpServer, "HTTP server should be created with default session timeout") - - // With no env var the default is 6 hours; a freshly-created session must survive. - cache := newFilteredServerCache(6 * time.Hour) - callCount := 0 - creator := func() *sdk.Server { - callCount++ - return sdk.NewServer(&sdk.Implementation{Name: "test", Version: "1.0"}, &sdk.ServerOptions{}) - } - cache.getOrCreate("backend", "session1", creator) - cache.getOrCreate("backend", "session1", creator) - assert.Equal(t, 1, callCount, "session should be reused within the 6h default timeout") + t.Run("default 6h timeout is not the old 30min value", func(t *testing.T) { + t.Setenv("MCP_GATEWAY_SESSION_TIMEOUT", "") + got := getRoutedSessionTimeout() + assert.Greater(t, got, 30*time.Minute, "default timeout must exceed the old hardcoded 30-minute value") }) }