From 9778bc3e3154fc2d66a911add01501e11a26dda1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:49:22 +0000 Subject: [PATCH 1/8] Initial plan From 107cc4ffe923d784d25143f52c6df8c34d4486d9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:53:37 +0000 Subject: [PATCH 2/8] Add startup timeout for MCP backend servers to handle dangling servers Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com> --- internal/config/config.go | 13 ++++++++ internal/launcher/launcher.go | 57 +++++++++++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index e9e002228..0f93047bb 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -94,6 +94,19 @@ func LoadFromFile(path string) (*Config, error) { // return nil, fmt.Errorf("unknown configuration keys: %v", meta.Undecoded()) } + // Set default gateway config values if gateway section exists but fields are unset + if cfg.Gateway != nil { + if cfg.Gateway.StartupTimeout == 0 { + cfg.Gateway.StartupTimeout = 60 + } + if cfg.Gateway.ToolTimeout == 0 { + cfg.Gateway.ToolTimeout = 120 + } + if cfg.Gateway.Port == 0 { + cfg.Gateway.Port = 3000 + } + } + logConfig.Printf("Successfully loaded %d servers from TOML file", len(cfg.Servers)) return &cfg, nil } diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go index 2efae91c6..a0d5cbd7f 100644 --- a/internal/launcher/launcher.go +++ b/internal/launcher/launcher.go @@ -7,6 +7,7 @@ import ( "os" "strings" "sync" + "time" "github.com/githubnext/gh-aw-mcpg/internal/config" "github.com/githubnext/gh-aw-mcpg/internal/logger" @@ -17,6 +18,11 @@ import ( var logLauncher = logger.New("launcher:launcher") +const ( + // DefaultStartupTimeout is the default timeout for backend server startup (seconds) + DefaultStartupTimeout = 60 +) + // Launcher manages backend MCP server connections type Launcher struct { ctx context.Context @@ -25,6 +31,7 @@ type Launcher struct { sessionPool *SessionConnectionPool // Session-aware connections (stateful/stdio) mu sync.RWMutex runningInContainer bool + startupTimeout time.Duration // Timeout for backend server startup } // New creates a new Launcher @@ -36,12 +43,22 @@ func New(ctx context.Context, cfg *config.Config) *Launcher { log.Println("[LAUNCHER] Detected running inside a container") } + // Get startup timeout from config, default to 60 seconds + startupTimeout := time.Duration(DefaultStartupTimeout) * time.Second + if cfg.Gateway != nil && cfg.Gateway.StartupTimeout > 0 { + startupTimeout = time.Duration(cfg.Gateway.StartupTimeout) * time.Second + logLauncher.Printf("Using configured startup timeout: %v", startupTimeout) + } else { + logLauncher.Printf("Using default startup timeout: %v", startupTimeout) + } + return &Launcher{ ctx: ctx, config: cfg, connections: make(map[string]*mcp.Connection), sessionPool: NewSessionConnectionPool(ctx), runningInContainer: inContainer, + startupTimeout: startupTimeout, } } @@ -147,8 +164,15 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) { log.Printf("[LAUNCHER] Additional env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) } + // Create a context with timeout for server startup + ctx, cancel := context.WithTimeout(l.ctx, l.startupTimeout) + defer cancel() + + log.Printf("[LAUNCHER] Starting server with %v timeout", l.startupTimeout) + logLauncher.Printf("Starting server with timeout: serverID=%s, timeout=%v", serverID, l.startupTimeout) + // Create connection - conn, err := mcp.NewConnection(l.ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) + conn, err := mcp.NewConnection(ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) if err != nil { // Enhanced error logging for command-based servers logger.LogError("backend", "Failed to launch MCP backend server: %s, error=%v", serverID, err) @@ -160,6 +184,16 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) { log.Printf("[LAUNCHER] - Env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) log.Printf("[LAUNCHER] - Running in container: %v", l.runningInContainer) log.Printf("[LAUNCHER] - Is direct command: %v", isDirectCommand) + log.Printf("[LAUNCHER] - Startup timeout: %v", l.startupTimeout) + + // Check if it's a timeout error + if ctx.Err() == context.DeadlineExceeded { + logger.LogError("backend", "MCP backend server startup timeout: %s, timeout=%v", serverID, l.startupTimeout) + log.Printf("[LAUNCHER] ⚠️ Server startup timed out after %v", l.startupTimeout) + log.Printf("[LAUNCHER] ⚠️ The server may be hanging or taking too long to initialize") + log.Printf("[LAUNCHER] ⚠️ Consider increasing 'startupTimeout' in gateway config if server needs more time") + return nil, fmt.Errorf("server startup timeout after %v: %w", l.startupTimeout, err) + } if isDirectCommand && l.runningInContainer { log.Printf("[LAUNCHER] ⚠️ Possible causes:") @@ -267,8 +301,15 @@ func GetOrLaunchForSession(l *Launcher, serverID, sessionID string) (*mcp.Connec log.Printf("[LAUNCHER] Additional env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) } + // Create a context with timeout for server startup + ctx, cancel := context.WithTimeout(l.ctx, l.startupTimeout) + defer cancel() + + log.Printf("[LAUNCHER] Starting server for session with %v timeout", l.startupTimeout) + logLauncher.Printf("Starting session server with timeout: serverID=%s, sessionID=%s, timeout=%v", serverID, sessionID, l.startupTimeout) + // Create connection - conn, err := mcp.NewConnection(l.ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) + conn, err := mcp.NewConnection(ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) if err != nil { logger.LogError("backend", "Failed to launch MCP backend server for session: server=%s, session=%s, error=%v", serverID, sessionID, err) log.Printf("[LAUNCHER] ❌ FAILED to launch server '%s' for session '%s'", serverID, sessionID) @@ -279,6 +320,18 @@ func GetOrLaunchForSession(l *Launcher, serverID, sessionID string) (*mcp.Connec log.Printf("[LAUNCHER] - Env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) log.Printf("[LAUNCHER] - Running in container: %v", l.runningInContainer) log.Printf("[LAUNCHER] - Is direct command: %v", isDirectCommand) + log.Printf("[LAUNCHER] - Startup timeout: %v", l.startupTimeout) + + // Check if it's a timeout error + if ctx.Err() == context.DeadlineExceeded { + logger.LogError("backend", "MCP backend server startup timeout for session: server=%s, session=%s, timeout=%v", serverID, sessionID, l.startupTimeout) + log.Printf("[LAUNCHER] ⚠️ Server startup timed out after %v", l.startupTimeout) + log.Printf("[LAUNCHER] ⚠️ The server may be hanging or taking too long to initialize") + log.Printf("[LAUNCHER] ⚠️ Consider increasing 'startupTimeout' in gateway config if server needs more time") + // Record error in session pool before returning + l.sessionPool.RecordError(serverID, sessionID) + return nil, fmt.Errorf("server startup timeout after %v: %w", l.startupTimeout, err) + } // Record error in session pool l.sessionPool.RecordError(serverID, sessionID) From a1140ca4773361db74266859219292eaf255646c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:55:40 +0000 Subject: [PATCH 3/8] Add tests for startup timeout configuration Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com> --- internal/launcher/launcher_test.go | 75 ++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/internal/launcher/launcher_test.go b/internal/launcher/launcher_test.go index bccf85177..412ef03f2 100644 --- a/internal/launcher/launcher_test.go +++ b/internal/launcher/launcher_test.go @@ -549,3 +549,78 @@ func TestGetOrLaunchForSession_InvalidServer(t *testing.T) { assert.Nil(t, conn) assert.Contains(t, err.Error(), "not found in config") } + +func TestLauncher_StartupTimeout(t *testing.T) { + // Test that launcher respects the startup timeout from config + tests := []struct { + name string + configTimeout int + expectedTimeout string + }{ + { + name: "default timeout (60 seconds)", + configTimeout: 0, // 0 means use default + expectedTimeout: "1m0s", + }, + { + name: "custom timeout (30 seconds)", + configTimeout: 30, + expectedTimeout: "30s", + }, + { + name: "custom timeout (120 seconds)", + configTimeout: 120, + expectedTimeout: "2m0s", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + cfg := &config.Config{ + Servers: map[string]*config.ServerConfig{ + "test-server": { + Type: "http", + URL: "http://example.com", + }, + }, + Gateway: &config.GatewayConfig{ + Port: 3000, + StartupTimeout: tt.configTimeout, + ToolTimeout: 120, + }, + } + + // If timeout is 0, set it to default to match LoadFromFile behavior + if cfg.Gateway.StartupTimeout == 0 { + cfg.Gateway.StartupTimeout = 60 + } + + l := New(ctx, cfg) + defer l.Close() + + // Verify the timeout was set correctly + assert.Equal(t, tt.expectedTimeout, l.startupTimeout.String()) + }) + } +} + +func TestLauncher_TimeoutWithNilGateway(t *testing.T) { + // Test that launcher uses default timeout when Gateway config is nil + ctx := context.Background() + cfg := &config.Config{ + Servers: map[string]*config.ServerConfig{ + "test-server": { + Type: "http", + URL: "http://example.com", + }, + }, + Gateway: nil, // No gateway config + } + + l := New(ctx, cfg) + defer l.Close() + + // Should use default timeout (60 seconds) + assert.Equal(t, "1m0s", l.startupTimeout.String()) +} From 5282bb0f64adf5e3a561d8797be6334967c8ec54 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:58:12 +0000 Subject: [PATCH 4/8] Fix linting error in common_test.go (pre-existing issue) --- internal/logger/common_test.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/internal/logger/common_test.go b/internal/logger/common_test.go index ae1a085bf..8c9a44e16 100644 --- a/internal/logger/common_test.go +++ b/internal/logger/common_test.go @@ -4,7 +4,6 @@ import ( "fmt" "os" "path/filepath" - "strings" "sync" "testing" @@ -607,7 +606,7 @@ func TestInitLogger_MarkdownLoggerFallback(t *testing.T) { } func TestInitLogger_SetupError(t *testing.T) { - assert := assert.New(t) + a := assert.New(t) tmpDir := t.TempDir() logDir := filepath.Join(tmpDir, "logs") fileName := "test.log" @@ -625,14 +624,14 @@ func TestInitLogger_SetupError(t *testing.T) { }, ) - assert.Error(err, "initLogger should return error on setup failure") - assert.Equal(assert.AnError, err, "Error should match setup error") - assert.Nil(logger, "logger should be nil on setup error") + a.Error(err, "initLogger should return error on setup failure") + a.Equal(assert.AnError, err, "Error should match setup error") + a.Nil(logger, "logger should be nil on setup error") // Verify the log file was created but then closed logPath := filepath.Join(logDir, fileName) _, err = os.Stat(logPath) - assert.NoError(err, "Log file should exist even after setup error") + a.NoError(err, "Log file should exist even after setup error") } func TestInitLogger_FileFlags(t *testing.T) { From a334921a8c97f87b5a47d3e43186fdee5879d779 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:01:59 +0000 Subject: [PATCH 5/8] Fix timeout implementation to not cancel connection after startup Use goroutine with timeout channel instead of context.WithTimeout to prevent connection from being canceled after successful startup. Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com> --- internal/launcher/launcher.go | 184 +++++++++++++++++++--------------- 1 file changed, 103 insertions(+), 81 deletions(-) diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go index a0d5cbd7f..7c12365bb 100644 --- a/internal/launcher/launcher.go +++ b/internal/launcher/launcher.go @@ -164,58 +164,69 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) { log.Printf("[LAUNCHER] Additional env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) } - // Create a context with timeout for server startup - ctx, cancel := context.WithTimeout(l.ctx, l.startupTimeout) - defer cancel() - log.Printf("[LAUNCHER] Starting server with %v timeout", l.startupTimeout) logLauncher.Printf("Starting server with timeout: serverID=%s, timeout=%v", serverID, l.startupTimeout) - // Create connection - conn, err := mcp.NewConnection(ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) - if err != nil { - // Enhanced error logging for command-based servers - logger.LogError("backend", "Failed to launch MCP backend server: %s, error=%v", serverID, err) - log.Printf("[LAUNCHER] ❌ FAILED to launch server '%s'", serverID) - log.Printf("[LAUNCHER] Error: %v", err) - log.Printf("[LAUNCHER] Debug Information:") - log.Printf("[LAUNCHER] - Command: %s", serverCfg.Command) - log.Printf("[LAUNCHER] - Args: %v", serverCfg.Args) - log.Printf("[LAUNCHER] - Env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) - log.Printf("[LAUNCHER] - Running in container: %v", l.runningInContainer) - log.Printf("[LAUNCHER] - Is direct command: %v", isDirectCommand) - log.Printf("[LAUNCHER] - Startup timeout: %v", l.startupTimeout) - - // Check if it's a timeout error - if ctx.Err() == context.DeadlineExceeded { - logger.LogError("backend", "MCP backend server startup timeout: %s, timeout=%v", serverID, l.startupTimeout) - log.Printf("[LAUNCHER] ⚠️ Server startup timed out after %v", l.startupTimeout) - log.Printf("[LAUNCHER] ⚠️ The server may be hanging or taking too long to initialize") - log.Printf("[LAUNCHER] ⚠️ Consider increasing 'startupTimeout' in gateway config if server needs more time") - return nil, fmt.Errorf("server startup timeout after %v: %w", l.startupTimeout, err) - } + // Create a channel to receive connection result + type connResult struct { + conn *mcp.Connection + err error + } + resultChan := make(chan connResult, 1) + + // Launch connection in a goroutine + go func() { + conn, err := mcp.NewConnection(l.ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) + resultChan <- connResult{conn, err} + }() + + // Wait for connection with timeout + select { + case result := <-resultChan: + conn, err := result.conn, result.err + if err != nil { + // Enhanced error logging for command-based servers + logger.LogError("backend", "Failed to launch MCP backend server: %s, error=%v", serverID, err) + log.Printf("[LAUNCHER] ❌ FAILED to launch server '%s'", serverID) + log.Printf("[LAUNCHER] Error: %v", err) + log.Printf("[LAUNCHER] Debug Information:") + log.Printf("[LAUNCHER] - Command: %s", serverCfg.Command) + log.Printf("[LAUNCHER] - Args: %v", serverCfg.Args) + log.Printf("[LAUNCHER] - Env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) + log.Printf("[LAUNCHER] - Running in container: %v", l.runningInContainer) + log.Printf("[LAUNCHER] - Is direct command: %v", isDirectCommand) + log.Printf("[LAUNCHER] - Startup timeout: %v", l.startupTimeout) + + if isDirectCommand && l.runningInContainer { + log.Printf("[LAUNCHER] ⚠️ Possible causes:") + log.Printf("[LAUNCHER] - Command '%s' may not be installed in the gateway container", serverCfg.Command) + log.Printf("[LAUNCHER] - Consider using 'container' config instead of 'command'") + log.Printf("[LAUNCHER] - Or add '%s' to the gateway's Dockerfile", serverCfg.Command) + } else if isDirectCommand { + log.Printf("[LAUNCHER] ⚠️ Possible causes:") + log.Printf("[LAUNCHER] - Command '%s' may not be in PATH", serverCfg.Command) + log.Printf("[LAUNCHER] - Check if '%s' is installed: which %s", serverCfg.Command, serverCfg.Command) + log.Printf("[LAUNCHER] - Verify file permissions and execute bit") + } - if isDirectCommand && l.runningInContainer { - log.Printf("[LAUNCHER] ⚠️ Possible causes:") - log.Printf("[LAUNCHER] - Command '%s' may not be installed in the gateway container", serverCfg.Command) - log.Printf("[LAUNCHER] - Consider using 'container' config instead of 'command'") - log.Printf("[LAUNCHER] - Or add '%s' to the gateway's Dockerfile", serverCfg.Command) - } else if isDirectCommand { - log.Printf("[LAUNCHER] ⚠️ Possible causes:") - log.Printf("[LAUNCHER] - Command '%s' may not be in PATH", serverCfg.Command) - log.Printf("[LAUNCHER] - Check if '%s' is installed: which %s", serverCfg.Command, serverCfg.Command) - log.Printf("[LAUNCHER] - Verify file permissions and execute bit") + return nil, fmt.Errorf("failed to create connection: %w", err) } - return nil, fmt.Errorf("failed to create connection: %w", err) - } + logger.LogInfo("backend", "Successfully launched MCP backend server: %s", serverID) + log.Printf("[LAUNCHER] Successfully launched: %s", serverID) + logLauncher.Printf("Connection established: serverID=%s", serverID) - logger.LogInfo("backend", "Successfully launched MCP backend server: %s", serverID) - log.Printf("[LAUNCHER] Successfully launched: %s", serverID) - logLauncher.Printf("Connection established: serverID=%s", serverID) + l.connections[serverID] = conn + return conn, nil - l.connections[serverID] = conn - return conn, nil + case <-time.After(l.startupTimeout): + // Timeout occurred + logger.LogError("backend", "MCP backend server startup timeout: %s, timeout=%v", serverID, l.startupTimeout) + log.Printf("[LAUNCHER] ❌ Server startup timed out after %v", l.startupTimeout) + log.Printf("[LAUNCHER] ⚠️ The server may be hanging or taking too long to initialize") + log.Printf("[LAUNCHER] ⚠️ Consider increasing 'startupTimeout' in gateway config if server needs more time") + return nil, fmt.Errorf("server startup timeout after %v", l.startupTimeout) + } } // GetOrLaunchForSession returns a session-aware connection or launches a new one @@ -301,51 +312,62 @@ func GetOrLaunchForSession(l *Launcher, serverID, sessionID string) (*mcp.Connec log.Printf("[LAUNCHER] Additional env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) } - // Create a context with timeout for server startup - ctx, cancel := context.WithTimeout(l.ctx, l.startupTimeout) - defer cancel() - log.Printf("[LAUNCHER] Starting server for session with %v timeout", l.startupTimeout) logLauncher.Printf("Starting session server with timeout: serverID=%s, sessionID=%s, timeout=%v", serverID, sessionID, l.startupTimeout) - // Create connection - conn, err := mcp.NewConnection(ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) - if err != nil { - logger.LogError("backend", "Failed to launch MCP backend server for session: server=%s, session=%s, error=%v", serverID, sessionID, err) - log.Printf("[LAUNCHER] ❌ FAILED to launch server '%s' for session '%s'", serverID, sessionID) - log.Printf("[LAUNCHER] Error: %v", err) - log.Printf("[LAUNCHER] Debug Information:") - log.Printf("[LAUNCHER] - Command: %s", serverCfg.Command) - log.Printf("[LAUNCHER] - Args: %v", serverCfg.Args) - log.Printf("[LAUNCHER] - Env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) - log.Printf("[LAUNCHER] - Running in container: %v", l.runningInContainer) - log.Printf("[LAUNCHER] - Is direct command: %v", isDirectCommand) - log.Printf("[LAUNCHER] - Startup timeout: %v", l.startupTimeout) - - // Check if it's a timeout error - if ctx.Err() == context.DeadlineExceeded { - logger.LogError("backend", "MCP backend server startup timeout for session: server=%s, session=%s, timeout=%v", serverID, sessionID, l.startupTimeout) - log.Printf("[LAUNCHER] ⚠️ Server startup timed out after %v", l.startupTimeout) - log.Printf("[LAUNCHER] ⚠️ The server may be hanging or taking too long to initialize") - log.Printf("[LAUNCHER] ⚠️ Consider increasing 'startupTimeout' in gateway config if server needs more time") - // Record error in session pool before returning + // Create a channel to receive connection result + type connResult struct { + conn *mcp.Connection + err error + } + resultChan := make(chan connResult, 1) + + // Launch connection in a goroutine + go func() { + conn, err := mcp.NewConnection(l.ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) + resultChan <- connResult{conn, err} + }() + + // Wait for connection with timeout + select { + case result := <-resultChan: + conn, err := result.conn, result.err + if err != nil { + logger.LogError("backend", "Failed to launch MCP backend server for session: server=%s, session=%s, error=%v", serverID, sessionID, err) + log.Printf("[LAUNCHER] ❌ FAILED to launch server '%s' for session '%s'", serverID, sessionID) + log.Printf("[LAUNCHER] Error: %v", err) + log.Printf("[LAUNCHER] Debug Information:") + log.Printf("[LAUNCHER] - Command: %s", serverCfg.Command) + log.Printf("[LAUNCHER] - Args: %v", serverCfg.Args) + log.Printf("[LAUNCHER] - Env vars: %v", sanitize.TruncateSecretMap(serverCfg.Env)) + log.Printf("[LAUNCHER] - Running in container: %v", l.runningInContainer) + log.Printf("[LAUNCHER] - Is direct command: %v", isDirectCommand) + log.Printf("[LAUNCHER] - Startup timeout: %v", l.startupTimeout) + + // Record error in session pool l.sessionPool.RecordError(serverID, sessionID) - return nil, fmt.Errorf("server startup timeout after %v: %w", l.startupTimeout, err) - } - // Record error in session pool - l.sessionPool.RecordError(serverID, sessionID) + return nil, fmt.Errorf("failed to create connection: %w", err) + } - return nil, fmt.Errorf("failed to create connection: %w", err) - } + logger.LogInfo("backend", "Successfully launched MCP backend server for session: server=%s, session=%s", serverID, sessionID) + log.Printf("[LAUNCHER] Successfully launched: %s (session: %s)", serverID, sessionID) + logLauncher.Printf("Session connection established: serverID=%s, sessionID=%s", serverID, sessionID) - logger.LogInfo("backend", "Successfully launched MCP backend server for session: server=%s, session=%s", serverID, sessionID) - log.Printf("[LAUNCHER] Successfully launched: %s (session: %s)", serverID, sessionID) - logLauncher.Printf("Session connection established: serverID=%s, sessionID=%s", serverID, sessionID) + // Add to session pool + l.sessionPool.Set(serverID, sessionID, conn) + return conn, nil - // Add to session pool - l.sessionPool.Set(serverID, sessionID, conn) - return conn, nil + case <-time.After(l.startupTimeout): + // Timeout occurred + logger.LogError("backend", "MCP backend server startup timeout for session: server=%s, session=%s, timeout=%v", serverID, sessionID, l.startupTimeout) + log.Printf("[LAUNCHER] ❌ Server startup timed out after %v", l.startupTimeout) + log.Printf("[LAUNCHER] ⚠️ The server may be hanging or taking too long to initialize") + log.Printf("[LAUNCHER] ⚠️ Consider increasing 'startupTimeout' in gateway config if server needs more time") + // Record error in session pool before returning + l.sessionPool.RecordError(serverID, sessionID) + return nil, fmt.Errorf("server startup timeout after %v", l.startupTimeout) + } } // ServerIDs returns all configured server IDs From 2d91b62f23ba1c59a8a4d33450391d0b20fa72e2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:04:55 +0000 Subject: [PATCH 6/8] Address code review: extract connectionResult type to package level Eliminates code duplication by defining connectionResult once at the package level instead of duplicating it in both GetOrLaunch() and GetOrLaunchForSession(). --- internal/launcher/launcher.go | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go index 7c12365bb..1fe767d17 100644 --- a/internal/launcher/launcher.go +++ b/internal/launcher/launcher.go @@ -23,6 +23,12 @@ const ( DefaultStartupTimeout = 60 ) +// connectionResult is used to return the result of a connection attempt from a goroutine +type connectionResult struct { + conn *mcp.Connection + err error +} + // Launcher manages backend MCP server connections type Launcher struct { ctx context.Context @@ -168,16 +174,12 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) { logLauncher.Printf("Starting server with timeout: serverID=%s, timeout=%v", serverID, l.startupTimeout) // Create a channel to receive connection result - type connResult struct { - conn *mcp.Connection - err error - } - resultChan := make(chan connResult, 1) + resultChan := make(chan connectionResult, 1) // Launch connection in a goroutine go func() { conn, err := mcp.NewConnection(l.ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) - resultChan <- connResult{conn, err} + resultChan <- connectionResult{conn, err} }() // Wait for connection with timeout @@ -316,16 +318,12 @@ func GetOrLaunchForSession(l *Launcher, serverID, sessionID string) (*mcp.Connec logLauncher.Printf("Starting session server with timeout: serverID=%s, sessionID=%s, timeout=%v", serverID, sessionID, l.startupTimeout) // Create a channel to receive connection result - type connResult struct { - conn *mcp.Connection - err error - } - resultChan := make(chan connResult, 1) + resultChan := make(chan connectionResult, 1) // Launch connection in a goroutine go func() { conn, err := mcp.NewConnection(l.ctx, serverCfg.Command, serverCfg.Args, serverCfg.Env) - resultChan <- connResult{conn, err} + resultChan <- connectionResult{conn, err} }() // Wait for connection with timeout From f84a8d5fa8db052526855e2f66c76b73c4c70381 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:06:38 +0000 Subject: [PATCH 7/8] Add comments clarifying buffered channel prevents goroutine leaks The buffered channel with size 1 allows the goroutine to complete even if timeout occurs first, preventing goroutine leaks. Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com> --- internal/launcher/launcher.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go index 1fe767d17..15bb4365a 100644 --- a/internal/launcher/launcher.go +++ b/internal/launcher/launcher.go @@ -173,7 +173,8 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) { log.Printf("[LAUNCHER] Starting server with %v timeout", l.startupTimeout) logLauncher.Printf("Starting server with timeout: serverID=%s, timeout=%v", serverID, l.startupTimeout) - // Create a channel to receive connection result + // Create a buffered channel to receive connection result + // Buffer size of 1 prevents goroutine leak if timeout occurs before connection completes resultChan := make(chan connectionResult, 1) // Launch connection in a goroutine @@ -317,7 +318,8 @@ func GetOrLaunchForSession(l *Launcher, serverID, sessionID string) (*mcp.Connec log.Printf("[LAUNCHER] Starting server for session with %v timeout", l.startupTimeout) logLauncher.Printf("Starting session server with timeout: serverID=%s, sessionID=%s, timeout=%v", serverID, sessionID, l.startupTimeout) - // Create a channel to receive connection result + // Create a buffered channel to receive connection result + // Buffer size of 1 prevents goroutine leak if timeout occurs before connection completes resultChan := make(chan connectionResult, 1) // Launch connection in a goroutine From 7f1fc67367394f8e3fa78b444d055a312c26e9a6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:09:47 +0000 Subject: [PATCH 8/8] Use config constants for default timeout values Replace magic numbers with package-level constants (DefaultStartupTimeout, DefaultToolTimeout, DefaultPort) to maintain consistency and avoid duplication. Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com> --- internal/config/config.go | 21 +++++++++++++++------ internal/launcher/launcher.go | 9 ++------- internal/launcher/launcher_test.go | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 0f93047bb..f7ae0e5f4 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -13,6 +13,15 @@ import ( var logConfig = logger.New("config:config") +const ( + // DefaultPort is the default port for the gateway HTTP server + DefaultPort = 3000 + // DefaultStartupTimeout is the default timeout for backend server startup (seconds) + DefaultStartupTimeout = 60 + // DefaultToolTimeout is the default timeout for tool execution (seconds) + DefaultToolTimeout = 120 +) + // Config represents the MCPG configuration type Config struct { Servers map[string]*ServerConfig `toml:"servers"` @@ -97,13 +106,13 @@ func LoadFromFile(path string) (*Config, error) { // Set default gateway config values if gateway section exists but fields are unset if cfg.Gateway != nil { if cfg.Gateway.StartupTimeout == 0 { - cfg.Gateway.StartupTimeout = 60 + cfg.Gateway.StartupTimeout = DefaultStartupTimeout } if cfg.Gateway.ToolTimeout == 0 { - cfg.Gateway.ToolTimeout = 120 + cfg.Gateway.ToolTimeout = DefaultToolTimeout } if cfg.Gateway.Port == 0 { - cfg.Gateway.Port = 3000 + cfg.Gateway.Port = DefaultPort } } @@ -170,11 +179,11 @@ func LoadFromStdin() (*Config, error) { // Store gateway config with defaults if stdinCfg.Gateway != nil { cfg.Gateway = &GatewayConfig{ - Port: 3000, + Port: DefaultPort, APIKey: stdinCfg.Gateway.APIKey, Domain: stdinCfg.Gateway.Domain, - StartupTimeout: 60, - ToolTimeout: 120, + StartupTimeout: DefaultStartupTimeout, + ToolTimeout: DefaultToolTimeout, } if stdinCfg.Gateway.Port != nil { cfg.Gateway.Port = *stdinCfg.Gateway.Port diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go index 15bb4365a..14fac508e 100644 --- a/internal/launcher/launcher.go +++ b/internal/launcher/launcher.go @@ -18,11 +18,6 @@ import ( var logLauncher = logger.New("launcher:launcher") -const ( - // DefaultStartupTimeout is the default timeout for backend server startup (seconds) - DefaultStartupTimeout = 60 -) - // connectionResult is used to return the result of a connection attempt from a goroutine type connectionResult struct { conn *mcp.Connection @@ -49,8 +44,8 @@ func New(ctx context.Context, cfg *config.Config) *Launcher { log.Println("[LAUNCHER] Detected running inside a container") } - // Get startup timeout from config, default to 60 seconds - startupTimeout := time.Duration(DefaultStartupTimeout) * time.Second + // Get startup timeout from config, default to config.DefaultStartupTimeout seconds + startupTimeout := time.Duration(config.DefaultStartupTimeout) * time.Second if cfg.Gateway != nil && cfg.Gateway.StartupTimeout > 0 { startupTimeout = time.Duration(cfg.Gateway.StartupTimeout) * time.Second logLauncher.Printf("Using configured startup timeout: %v", startupTimeout) diff --git a/internal/launcher/launcher_test.go b/internal/launcher/launcher_test.go index 412ef03f2..5ccba0f35 100644 --- a/internal/launcher/launcher_test.go +++ b/internal/launcher/launcher_test.go @@ -593,7 +593,7 @@ func TestLauncher_StartupTimeout(t *testing.T) { // If timeout is 0, set it to default to match LoadFromFile behavior if cfg.Gateway.StartupTimeout == 0 { - cfg.Gateway.StartupTimeout = 60 + cfg.Gateway.StartupTimeout = config.DefaultStartupTimeout } l := New(ctx, cfg)