77 "os"
88 "strings"
99 "sync"
10+ "time"
1011
1112 "github.com/githubnext/gh-aw-mcpg/internal/config"
1213 "github.com/githubnext/gh-aw-mcpg/internal/logger"
@@ -17,6 +18,12 @@ import (
1718
1819var logLauncher = logger .New ("launcher:launcher" )
1920
21+ // connectionResult is used to return the result of a connection attempt from a goroutine
22+ type connectionResult struct {
23+ conn * mcp.Connection
24+ err error
25+ }
26+
2027// Launcher manages backend MCP server connections
2128type Launcher struct {
2229 ctx context.Context
@@ -25,6 +32,7 @@ type Launcher struct {
2532 sessionPool * SessionConnectionPool // Session-aware connections (stateful/stdio)
2633 mu sync.RWMutex
2734 runningInContainer bool
35+ startupTimeout time.Duration // Timeout for backend server startup
2836}
2937
3038// New creates a new Launcher
@@ -36,12 +44,22 @@ func New(ctx context.Context, cfg *config.Config) *Launcher {
3644 log .Println ("[LAUNCHER] Detected running inside a container" )
3745 }
3846
47+ // Get startup timeout from config, default to config.DefaultStartupTimeout seconds
48+ startupTimeout := time .Duration (config .DefaultStartupTimeout ) * time .Second
49+ if cfg .Gateway != nil && cfg .Gateway .StartupTimeout > 0 {
50+ startupTimeout = time .Duration (cfg .Gateway .StartupTimeout ) * time .Second
51+ logLauncher .Printf ("Using configured startup timeout: %v" , startupTimeout )
52+ } else {
53+ logLauncher .Printf ("Using default startup timeout: %v" , startupTimeout )
54+ }
55+
3956 return & Launcher {
4057 ctx : ctx ,
4158 config : cfg ,
4259 connections : make (map [string ]* mcp.Connection ),
4360 sessionPool : NewSessionConnectionPool (ctx ),
4461 runningInContainer : inContainer ,
62+ startupTimeout : startupTimeout ,
4563 }
4664}
4765
@@ -147,41 +165,66 @@ func GetOrLaunch(l *Launcher, serverID string) (*mcp.Connection, error) {
147165 log .Printf ("[LAUNCHER] Additional env vars: %v" , sanitize .TruncateSecretMap (serverCfg .Env ))
148166 }
149167
150- // Create connection
151- conn , err := mcp .NewConnection (l .ctx , serverCfg .Command , serverCfg .Args , serverCfg .Env )
152- if err != nil {
153- // Enhanced error logging for command-based servers
154- logger .LogError ("backend" , "Failed to launch MCP backend server: %s, error=%v" , serverID , err )
155- log .Printf ("[LAUNCHER] ❌ FAILED to launch server '%s'" , serverID )
156- log .Printf ("[LAUNCHER] Error: %v" , err )
157- log .Printf ("[LAUNCHER] Debug Information:" )
158- log .Printf ("[LAUNCHER] - Command: %s" , serverCfg .Command )
159- log .Printf ("[LAUNCHER] - Args: %v" , serverCfg .Args )
160- log .Printf ("[LAUNCHER] - Env vars: %v" , sanitize .TruncateSecretMap (serverCfg .Env ))
161- log .Printf ("[LAUNCHER] - Running in container: %v" , l .runningInContainer )
162- log .Printf ("[LAUNCHER] - Is direct command: %v" , isDirectCommand )
163-
164- if isDirectCommand && l .runningInContainer {
165- log .Printf ("[LAUNCHER] ⚠️ Possible causes:" )
166- log .Printf ("[LAUNCHER] - Command '%s' may not be installed in the gateway container" , serverCfg .Command )
167- log .Printf ("[LAUNCHER] - Consider using 'container' config instead of 'command'" )
168- log .Printf ("[LAUNCHER] - Or add '%s' to the gateway's Dockerfile" , serverCfg .Command )
169- } else if isDirectCommand {
170- log .Printf ("[LAUNCHER] ⚠️ Possible causes:" )
171- log .Printf ("[LAUNCHER] - Command '%s' may not be in PATH" , serverCfg .Command )
172- log .Printf ("[LAUNCHER] - Check if '%s' is installed: which %s" , serverCfg .Command , serverCfg .Command )
173- log .Printf ("[LAUNCHER] - Verify file permissions and execute bit" )
168+ log .Printf ("[LAUNCHER] Starting server with %v timeout" , l .startupTimeout )
169+ logLauncher .Printf ("Starting server with timeout: serverID=%s, timeout=%v" , serverID , l .startupTimeout )
170+
171+ // Create a buffered channel to receive connection result
172+ // Buffer size of 1 prevents goroutine leak if timeout occurs before connection completes
173+ resultChan := make (chan connectionResult , 1 )
174+
175+ // Launch connection in a goroutine
176+ go func () {
177+ conn , err := mcp .NewConnection (l .ctx , serverCfg .Command , serverCfg .Args , serverCfg .Env )
178+ resultChan <- connectionResult {conn , err }
179+ }()
180+
181+ // Wait for connection with timeout
182+ select {
183+ case result := <- resultChan :
184+ conn , err := result .conn , result .err
185+ if err != nil {
186+ // Enhanced error logging for command-based servers
187+ logger .LogError ("backend" , "Failed to launch MCP backend server: %s, error=%v" , serverID , err )
188+ log .Printf ("[LAUNCHER] ❌ FAILED to launch server '%s'" , serverID )
189+ log .Printf ("[LAUNCHER] Error: %v" , err )
190+ log .Printf ("[LAUNCHER] Debug Information:" )
191+ log .Printf ("[LAUNCHER] - Command: %s" , serverCfg .Command )
192+ log .Printf ("[LAUNCHER] - Args: %v" , serverCfg .Args )
193+ log .Printf ("[LAUNCHER] - Env vars: %v" , sanitize .TruncateSecretMap (serverCfg .Env ))
194+ log .Printf ("[LAUNCHER] - Running in container: %v" , l .runningInContainer )
195+ log .Printf ("[LAUNCHER] - Is direct command: %v" , isDirectCommand )
196+ log .Printf ("[LAUNCHER] - Startup timeout: %v" , l .startupTimeout )
197+
198+ if isDirectCommand && l .runningInContainer {
199+ log .Printf ("[LAUNCHER] ⚠️ Possible causes:" )
200+ log .Printf ("[LAUNCHER] - Command '%s' may not be installed in the gateway container" , serverCfg .Command )
201+ log .Printf ("[LAUNCHER] - Consider using 'container' config instead of 'command'" )
202+ log .Printf ("[LAUNCHER] - Or add '%s' to the gateway's Dockerfile" , serverCfg .Command )
203+ } else if isDirectCommand {
204+ log .Printf ("[LAUNCHER] ⚠️ Possible causes:" )
205+ log .Printf ("[LAUNCHER] - Command '%s' may not be in PATH" , serverCfg .Command )
206+ log .Printf ("[LAUNCHER] - Check if '%s' is installed: which %s" , serverCfg .Command , serverCfg .Command )
207+ log .Printf ("[LAUNCHER] - Verify file permissions and execute bit" )
208+ }
209+
210+ return nil , fmt .Errorf ("failed to create connection: %w" , err )
174211 }
175212
176- return nil , fmt .Errorf ("failed to create connection: %w" , err )
177- }
213+ logger .LogInfo ("backend" , "Successfully launched MCP backend server: %s" , serverID )
214+ log .Printf ("[LAUNCHER] Successfully launched: %s" , serverID )
215+ logLauncher .Printf ("Connection established: serverID=%s" , serverID )
178216
179- logger .LogInfo ("backend" , "Successfully launched MCP backend server: %s" , serverID )
180- log .Printf ("[LAUNCHER] Successfully launched: %s" , serverID )
181- logLauncher .Printf ("Connection established: serverID=%s" , serverID )
217+ l .connections [serverID ] = conn
218+ return conn , nil
182219
183- l .connections [serverID ] = conn
184- return conn , nil
220+ case <- time .After (l .startupTimeout ):
221+ // Timeout occurred
222+ logger .LogError ("backend" , "MCP backend server startup timeout: %s, timeout=%v" , serverID , l .startupTimeout )
223+ log .Printf ("[LAUNCHER] ❌ Server startup timed out after %v" , l .startupTimeout )
224+ log .Printf ("[LAUNCHER] ⚠️ The server may be hanging or taking too long to initialize" )
225+ log .Printf ("[LAUNCHER] ⚠️ Consider increasing 'startupTimeout' in gateway config if server needs more time" )
226+ return nil , fmt .Errorf ("server startup timeout after %v" , l .startupTimeout )
227+ }
185228}
186229
187230// GetOrLaunchForSession returns a session-aware connection or launches a new one
@@ -267,32 +310,59 @@ func GetOrLaunchForSession(l *Launcher, serverID, sessionID string) (*mcp.Connec
267310 log .Printf ("[LAUNCHER] Additional env vars: %v" , sanitize .TruncateSecretMap (serverCfg .Env ))
268311 }
269312
270- // Create connection
271- conn , err := mcp .NewConnection (l .ctx , serverCfg .Command , serverCfg .Args , serverCfg .Env )
272- if err != nil {
273- logger .LogError ("backend" , "Failed to launch MCP backend server for session: server=%s, session=%s, error=%v" , serverID , sessionID , err )
274- log .Printf ("[LAUNCHER] ❌ FAILED to launch server '%s' for session '%s'" , serverID , sessionID )
275- log .Printf ("[LAUNCHER] Error: %v" , err )
276- log .Printf ("[LAUNCHER] Debug Information:" )
277- log .Printf ("[LAUNCHER] - Command: %s" , serverCfg .Command )
278- log .Printf ("[LAUNCHER] - Args: %v" , serverCfg .Args )
279- log .Printf ("[LAUNCHER] - Env vars: %v" , sanitize .TruncateSecretMap (serverCfg .Env ))
280- log .Printf ("[LAUNCHER] - Running in container: %v" , l .runningInContainer )
281- log .Printf ("[LAUNCHER] - Is direct command: %v" , isDirectCommand )
282-
283- // Record error in session pool
284- l .sessionPool .RecordError (serverID , sessionID )
313+ log .Printf ("[LAUNCHER] Starting server for session with %v timeout" , l .startupTimeout )
314+ logLauncher .Printf ("Starting session server with timeout: serverID=%s, sessionID=%s, timeout=%v" , serverID , sessionID , l .startupTimeout )
285315
286- return nil , fmt .Errorf ("failed to create connection: %w" , err )
287- }
316+ // Create a buffered channel to receive connection result
317+ // Buffer size of 1 prevents goroutine leak if timeout occurs before connection completes
318+ resultChan := make (chan connectionResult , 1 )
288319
289- logger .LogInfo ("backend" , "Successfully launched MCP backend server for session: server=%s, session=%s" , serverID , sessionID )
290- log .Printf ("[LAUNCHER] Successfully launched: %s (session: %s)" , serverID , sessionID )
291- logLauncher .Printf ("Session connection established: serverID=%s, sessionID=%s" , serverID , sessionID )
320+ // Launch connection in a goroutine
321+ go func () {
322+ conn , err := mcp .NewConnection (l .ctx , serverCfg .Command , serverCfg .Args , serverCfg .Env )
323+ resultChan <- connectionResult {conn , err }
324+ }()
292325
293- // Add to session pool
294- l .sessionPool .Set (serverID , sessionID , conn )
295- return conn , nil
326+ // Wait for connection with timeout
327+ select {
328+ case result := <- resultChan :
329+ conn , err := result .conn , result .err
330+ if err != nil {
331+ logger .LogError ("backend" , "Failed to launch MCP backend server for session: server=%s, session=%s, error=%v" , serverID , sessionID , err )
332+ log .Printf ("[LAUNCHER] ❌ FAILED to launch server '%s' for session '%s'" , serverID , sessionID )
333+ log .Printf ("[LAUNCHER] Error: %v" , err )
334+ log .Printf ("[LAUNCHER] Debug Information:" )
335+ log .Printf ("[LAUNCHER] - Command: %s" , serverCfg .Command )
336+ log .Printf ("[LAUNCHER] - Args: %v" , serverCfg .Args )
337+ log .Printf ("[LAUNCHER] - Env vars: %v" , sanitize .TruncateSecretMap (serverCfg .Env ))
338+ log .Printf ("[LAUNCHER] - Running in container: %v" , l .runningInContainer )
339+ log .Printf ("[LAUNCHER] - Is direct command: %v" , isDirectCommand )
340+ log .Printf ("[LAUNCHER] - Startup timeout: %v" , l .startupTimeout )
341+
342+ // Record error in session pool
343+ l .sessionPool .RecordError (serverID , sessionID )
344+
345+ return nil , fmt .Errorf ("failed to create connection: %w" , err )
346+ }
347+
348+ logger .LogInfo ("backend" , "Successfully launched MCP backend server for session: server=%s, session=%s" , serverID , sessionID )
349+ log .Printf ("[LAUNCHER] Successfully launched: %s (session: %s)" , serverID , sessionID )
350+ logLauncher .Printf ("Session connection established: serverID=%s, sessionID=%s" , serverID , sessionID )
351+
352+ // Add to session pool
353+ l .sessionPool .Set (serverID , sessionID , conn )
354+ return conn , nil
355+
356+ case <- time .After (l .startupTimeout ):
357+ // Timeout occurred
358+ logger .LogError ("backend" , "MCP backend server startup timeout for session: server=%s, session=%s, timeout=%v" , serverID , sessionID , l .startupTimeout )
359+ log .Printf ("[LAUNCHER] ❌ Server startup timed out after %v" , l .startupTimeout )
360+ log .Printf ("[LAUNCHER] ⚠️ The server may be hanging or taking too long to initialize" )
361+ log .Printf ("[LAUNCHER] ⚠️ Consider increasing 'startupTimeout' in gateway config if server needs more time" )
362+ // Record error in session pool before returning
363+ l .sessionPool .RecordError (serverID , sessionID )
364+ return nil , fmt .Errorf ("server startup timeout after %v" , l .startupTimeout )
365+ }
296366}
297367
298368// ServerIDs returns all configured server IDs
0 commit comments