豆豆友情提示:这是一个非官方 GitHub 代理镜像,主要用于网络测试或访问加速。请勿在此进行登录、注册或处理任何敏感信息。进行这些操作请务必访问官方网站 github.com。 Raw 内容也通过此代理提供。
Skip to content

Commit 4468bf6

Browse files
committed
fix(mcp): close session write stream before exit stack unwind to prevent semaphore leaks
When MCPServerStdio.cleanup() tears down via exit_stack.aclose(), the ClientSession exits first (cancelling its task group), then the transport exits. The ClientSession cancellation kills the reader/writer tasks inside the transport before the transport's finally block can close stdin and wait for the subprocess to exit gracefully. This race prevents the subprocess from flushing multiprocessing resources, causing "leaked semaphore" warnings from Python's resource_tracker. Fix: explicitly close the session's write stream before the exit_stack unwind. This delivers EOF to the subprocess's stdin, giving it a window to release its resources and exit cleanly before task groups are cancelled. Also reinitialise the exit_stack and clear server_initialize_result during cleanup so the same server instance can reconnect after cleanup. Closes #618
1 parent 9f5575a commit 4468bf6

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed

src/agents/mcp/server.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,22 @@ async def cleanup(self):
915915
is_failed_connection_cleanup = self.session is None
916916

917917
try:
918+
# Signal the subprocess to shut down gracefully before the exit stack
919+
# unwinds. The exit stack tears down contexts in LIFO order: the
920+
# ClientSession exits first (cancelling its internal task group), then
921+
# the transport (stdio_client / sse_client) exits. Because the
922+
# ClientSession cancellation kills the reader/writer tasks inside the
923+
# transport, the transport's ``finally`` block — which tries to close
924+
# stdin and wait for the process — may never execute cleanly. By
925+
# closing the session's write stream ahead of time we deliver an EOF to
926+
# the subprocess's stdin, giving it a chance to flush resources (e.g.
927+
# multiprocessing semaphores) and exit on its own before we cancel the
928+
# task groups.
929+
if self.session is not None:
930+
try:
931+
await self.session._write_stream.aclose()
932+
except Exception:
933+
pass # Best-effort; the stream may already be closed.
918934
await self.exit_stack.aclose()
919935
except asyncio.CancelledError as e:
920936
logger.debug(f"Cleanup cancelled for MCP server '{self.name}': {e}")
@@ -982,6 +998,10 @@ async def cleanup(self):
982998
finally:
983999
self.session = None
9841000
self._get_session_id = None
1001+
self.server_initialize_result = None
1002+
# Reinitialize the exit stack so the same server instance can
1003+
# reconnect cleanly after cleanup.
1004+
self.exit_stack = AsyncExitStack()
9851005

9861006

9871007
class MCPServerStdioParams(TypedDict):

tests/mcp/test_connect_disconnect.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from contextlib import AsyncExitStack
12
from unittest.mock import AsyncMock, patch
23

34
import pytest
@@ -67,3 +68,111 @@ async def test_manual_connect_disconnect_works(
6768

6869
await server.cleanup()
6970
assert server.session is None, "Server should be disconnected"
71+
72+
73+
@pytest.mark.asyncio
74+
@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
75+
@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
76+
async def test_cleanup_resets_state_for_reconnection(mock_initialize: AsyncMock, mock_stdio_client):
77+
"""Test that cleanup resets all session state so the same instance can reconnect."""
78+
server = MCPServerStdio(
79+
params={"command": tee},
80+
cache_tools_list=True,
81+
)
82+
83+
await server.connect()
84+
first_exit_stack = server.exit_stack
85+
assert server.session is not None
86+
assert server.server_initialize_result is not None or mock_initialize.return_value is None
87+
88+
await server.cleanup()
89+
90+
# All session state must be cleared
91+
assert server.session is None
92+
assert server.server_initialize_result is None
93+
assert server._get_session_id is None
94+
# Exit stack must be a fresh instance so a subsequent connect() works
95+
assert isinstance(server.exit_stack, AsyncExitStack)
96+
assert server.exit_stack is not first_exit_stack
97+
98+
99+
@pytest.mark.asyncio
100+
@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
101+
@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
102+
@patch("mcp.client.session.ClientSession.list_tools")
103+
async def test_reconnect_after_cleanup(
104+
mock_list_tools: AsyncMock, mock_initialize: AsyncMock, mock_stdio_client
105+
):
106+
"""Test that an MCPServerStdio instance can reconnect after cleanup."""
107+
server = MCPServerStdio(
108+
params={"command": tee},
109+
cache_tools_list=True,
110+
)
111+
112+
tools = [MCPTool(name="tool1", inputSchema={})]
113+
mock_list_tools.return_value = ListToolsResult(tools=tools)
114+
115+
# First connection cycle
116+
await server.connect()
117+
result = await server.list_tools()
118+
assert len(result) == 1
119+
await server.cleanup()
120+
assert server.session is None
121+
122+
# Second connection cycle on the same instance
123+
await server.connect()
124+
assert server.session is not None
125+
result = await server.list_tools()
126+
assert len(result) == 1
127+
await server.cleanup()
128+
assert server.session is None
129+
130+
131+
@pytest.mark.asyncio
132+
@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
133+
@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
134+
async def test_cleanup_closes_write_stream_before_exit_stack(
135+
mock_initialize: AsyncMock, mock_stdio_client
136+
):
137+
"""Test that cleanup closes the session write stream before unwinding the exit stack.
138+
139+
This ordering ensures the subprocess receives EOF on stdin and can shut down
140+
gracefully (releasing multiprocessing semaphores, etc.) before task-group
141+
cancellation kills the reader/writer coroutines inside the transport.
142+
"""
143+
server = MCPServerStdio(
144+
params={"command": tee},
145+
)
146+
147+
await server.connect()
148+
assert server.session is not None
149+
150+
# Track the order of operations during cleanup
151+
call_order: list[str] = []
152+
original_aclose = server.session._write_stream.aclose
153+
154+
async def tracked_write_stream_close():
155+
call_order.append("write_stream_closed")
156+
return await original_aclose()
157+
158+
original_exit_stack_aclose = server.exit_stack.aclose
159+
160+
async def tracked_exit_stack_aclose():
161+
call_order.append("exit_stack_closed")
162+
return await original_exit_stack_aclose()
163+
164+
server.session._write_stream.aclose = tracked_write_stream_close # type: ignore[assignment]
165+
server.exit_stack.aclose = tracked_exit_stack_aclose # type: ignore[assignment]
166+
167+
await server.cleanup()
168+
169+
# The write stream may be closed multiple times (our explicit close, then again
170+
# during exit_stack unwind by ClientSession.__aexit__). The critical invariant
171+
# is that the FIRST close happens before the exit_stack unwind begins.
172+
assert len(call_order) >= 2, f"Expected at least 2 calls, got: {call_order}"
173+
assert call_order[0] == "write_stream_closed", (
174+
f"Write stream must be closed first, got: {call_order}"
175+
)
176+
assert call_order[1] == "exit_stack_closed", (
177+
f"Exit stack must be closed after write stream, got: {call_order}"
178+
)

0 commit comments

Comments
 (0)