豆豆友情提示:这是一个非官方 GitHub 代理镜像,主要用于网络测试或访问加速。请勿在此进行登录、注册或处理任何敏感信息。进行这些操作请务必访问官方网站 github.com。 Raw 内容也通过此代理提供。
Skip to content

Commit b5e5ffa

Browse files
committed
fix(mcp): close session write stream before exit stack unwind to prevent semaphore leaks
When MCPServerStdio.cleanup() tears down via exit_stack.aclose(), the ClientSession exits first (cancelling its task group), then the transport exits. The ClientSession cancellation kills the reader/writer tasks inside the transport before the transport's finally block can close stdin and wait for the subprocess to exit gracefully. This race prevents the subprocess from flushing multiprocessing resources, causing "leaked semaphore" warnings from Python's resource_tracker. Fix: explicitly close the session's write stream before the exit_stack unwind. This delivers EOF to the subprocess's stdin, giving it a window to release its resources and exit cleanly before task groups are cancelled. Also reinitialise the exit_stack and clear server_initialize_result during cleanup so the same server instance can reconnect after cleanup. Closes #618
1 parent 86739b1 commit b5e5ffa

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed

src/agents/mcp/server.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,22 @@ async def cleanup(self):
943943
is_failed_connection_cleanup = self.session is None
944944

945945
try:
946+
# Signal the subprocess to shut down gracefully before the exit stack
947+
# unwinds. The exit stack tears down contexts in LIFO order: the
948+
# ClientSession exits first (cancelling its internal task group), then
949+
# the transport (stdio_client / sse_client) exits. Because the
950+
# ClientSession cancellation kills the reader/writer tasks inside the
951+
# transport, the transport's ``finally`` block — which tries to close
952+
# stdin and wait for the process — may never execute cleanly. By
953+
# closing the session's write stream ahead of time we deliver an EOF to
954+
# the subprocess's stdin, giving it a chance to flush resources (e.g.
955+
# multiprocessing semaphores) and exit on its own before we cancel the
956+
# task groups.
957+
if self.session is not None:
958+
try:
959+
await self.session._write_stream.aclose()
960+
except Exception:
961+
pass # Best-effort; the stream may already be closed.
946962
await self.exit_stack.aclose()
947963
except asyncio.CancelledError as e:
948964
logger.debug(f"Cleanup cancelled for MCP server '{self.name}': {e}")
@@ -1010,6 +1026,10 @@ async def cleanup(self):
10101026
finally:
10111027
self.session = None
10121028
self._get_session_id = None
1029+
self.server_initialize_result = None
1030+
# Reinitialize the exit stack so the same server instance can
1031+
# reconnect cleanly after cleanup.
1032+
self.exit_stack = AsyncExitStack()
10131033

10141034

10151035
class MCPServerStdioParams(TypedDict):

tests/mcp/test_connect_disconnect.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from contextlib import AsyncExitStack
12
from unittest.mock import AsyncMock, patch
23

34
import pytest
@@ -67,3 +68,111 @@ async def test_manual_connect_disconnect_works(
6768

6869
await server.cleanup()
6970
assert server.session is None, "Server should be disconnected"
71+
72+
73+
@pytest.mark.asyncio
74+
@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
75+
@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
76+
async def test_cleanup_resets_state_for_reconnection(mock_initialize: AsyncMock, mock_stdio_client):
77+
"""Test that cleanup resets all session state so the same instance can reconnect."""
78+
server = MCPServerStdio(
79+
params={"command": tee},
80+
cache_tools_list=True,
81+
)
82+
83+
await server.connect()
84+
first_exit_stack = server.exit_stack
85+
assert server.session is not None
86+
assert server.server_initialize_result is not None or mock_initialize.return_value is None
87+
88+
await server.cleanup()
89+
90+
# All session state must be cleared
91+
assert server.session is None
92+
assert server.server_initialize_result is None
93+
assert server._get_session_id is None
94+
# Exit stack must be a fresh instance so a subsequent connect() works
95+
assert isinstance(server.exit_stack, AsyncExitStack)
96+
assert server.exit_stack is not first_exit_stack
97+
98+
99+
@pytest.mark.asyncio
100+
@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
101+
@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
102+
@patch("mcp.client.session.ClientSession.list_tools")
103+
async def test_reconnect_after_cleanup(
104+
mock_list_tools: AsyncMock, mock_initialize: AsyncMock, mock_stdio_client
105+
):
106+
"""Test that an MCPServerStdio instance can reconnect after cleanup."""
107+
server = MCPServerStdio(
108+
params={"command": tee},
109+
cache_tools_list=True,
110+
)
111+
112+
tools = [MCPTool(name="tool1", inputSchema={})]
113+
mock_list_tools.return_value = ListToolsResult(tools=tools)
114+
115+
# First connection cycle
116+
await server.connect()
117+
result = await server.list_tools()
118+
assert len(result) == 1
119+
await server.cleanup()
120+
assert server.session is None
121+
122+
# Second connection cycle on the same instance
123+
await server.connect()
124+
assert server.session is not None
125+
result = await server.list_tools()
126+
assert len(result) == 1
127+
await server.cleanup()
128+
assert server.session is None
129+
130+
131+
@pytest.mark.asyncio
132+
@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
133+
@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
134+
async def test_cleanup_closes_write_stream_before_exit_stack(
135+
mock_initialize: AsyncMock, mock_stdio_client
136+
):
137+
"""Test that cleanup closes the session write stream before unwinding the exit stack.
138+
139+
This ordering ensures the subprocess receives EOF on stdin and can shut down
140+
gracefully (releasing multiprocessing semaphores, etc.) before task-group
141+
cancellation kills the reader/writer coroutines inside the transport.
142+
"""
143+
server = MCPServerStdio(
144+
params={"command": tee},
145+
)
146+
147+
await server.connect()
148+
assert server.session is not None
149+
150+
# Track the order of operations during cleanup
151+
call_order: list[str] = []
152+
original_aclose = server.session._write_stream.aclose
153+
154+
async def tracked_write_stream_close():
155+
call_order.append("write_stream_closed")
156+
return await original_aclose()
157+
158+
original_exit_stack_aclose = server.exit_stack.aclose
159+
160+
async def tracked_exit_stack_aclose():
161+
call_order.append("exit_stack_closed")
162+
return await original_exit_stack_aclose()
163+
164+
server.session._write_stream.aclose = tracked_write_stream_close # type: ignore[assignment]
165+
server.exit_stack.aclose = tracked_exit_stack_aclose # type: ignore[assignment]
166+
167+
await server.cleanup()
168+
169+
# The write stream may be closed multiple times (our explicit close, then again
170+
# during exit_stack unwind by ClientSession.__aexit__). The critical invariant
171+
# is that the FIRST close happens before the exit_stack unwind begins.
172+
assert len(call_order) >= 2, f"Expected at least 2 calls, got: {call_order}"
173+
assert call_order[0] == "write_stream_closed", (
174+
f"Write stream must be closed first, got: {call_order}"
175+
)
176+
assert call_order[1] == "exit_stack_closed", (
177+
f"Exit stack must be closed after write stream, got: {call_order}"
178+
)

0 commit comments

Comments
 (0)