From d15efabf2852f54347631e2cede4b2b079e28594 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Thu, 26 Jun 2025 20:22:55 +0100 Subject: [PATCH 1/3] Fix stdio cleanup hanging on slow-terminating processes The stdio cleanup was hanging indefinitely when processes ignored termination signals or took too long to exit. This caused the MCP client to freeze during shutdown, especially with servers that don't handle SIGTERM properly. This was already being handled on Windows, but not Unix systems. This Commit unifies the two approaches, removing special logic for windows process termination. The fix introduces a 2-second timeout for process termination. If a process doesn't exit gracefully within this window, it's forcefully killed. This ensures the client always completes cleanup in bounded time while still giving well-behaved servers a chance to exit cleanly. This resolves hanging issues reported when MCP servers ignore standard termination signals. resolves #555 Also adds regression tests for #559. Co-authored-by: Cristian Pufu --- src/mcp/client/stdio/__init__.py | 11 +-- src/mcp/client/stdio/win32.py | 23 ------ tests/client/test_stdio.py | 122 +++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 28 deletions(-) diff --git a/src/mcp/client/stdio/__init__.py b/src/mcp/client/stdio/__init__.py index a75cfd764..f02ee4607 100644 --- a/src/mcp/client/stdio/__init__.py +++ b/src/mcp/client/stdio/__init__.py @@ -16,7 +16,6 @@ from .win32 import ( create_windows_process, get_windows_executable_command, - terminate_windows_process, ) # Environment variables to inherit by default @@ -180,10 +179,12 @@ async def stdin_writer(): finally: # Clean up process to prevent any dangling orphaned processes try: - if sys.platform == "win32": - await terminate_windows_process(process) - else: - process.terminate() + process.terminate() + with anyio.fail_after(2.0): + await process.wait() + except TimeoutError: + # If process doesn't terminate in time, force kill it + process.kill() except ProcessLookupError: # Process already exited, which is fine pass diff --git a/src/mcp/client/stdio/win32.py b/src/mcp/client/stdio/win32.py index 7246b9dec..df9d0f068 100644 --- a/src/mcp/client/stdio/win32.py +++ b/src/mcp/client/stdio/win32.py @@ -8,9 +8,7 @@ from pathlib import Path from typing import BinaryIO, TextIO, cast -import anyio from anyio import to_thread -from anyio.abc import Process from anyio.streams.file import FileReadStream, FileWriteStream @@ -159,24 +157,3 @@ async def create_windows_process( bufsize=0, ) return FallbackProcess(popen_obj) - - -async def terminate_windows_process(process: Process | FallbackProcess): - """ - Terminate a Windows process. - - Note: On Windows, terminating a process with process.terminate() doesn't - always guarantee immediate process termination. - So we give it 2s to exit, or we call process.kill() - which sends a SIGKILL equivalent signal. - - Args: - process: The process to terminate - """ - try: - process.terminate() - with anyio.fail_after(2.0): - await process.wait() - except TimeoutError: - # Force kill if it doesn't terminate - process.kill() diff --git a/tests/client/test_stdio.py b/tests/client/test_stdio.py index c66a16ab9..eda7dd3e3 100644 --- a/tests/client/test_stdio.py +++ b/tests/client/test_stdio.py @@ -1,5 +1,9 @@ import shutil +import sys +import textwrap +import time +import anyio import pytest from mcp.client.session import ClientSession @@ -90,3 +94,121 @@ async def test_stdio_client_nonexistent_command(): or "not found" in error_message.lower() or "cannot find the file" in error_message.lower() # Windows error message ) + + +@pytest.mark.anyio +async def test_stdio_client_universal_cleanup(): + """ + Test that stdio_client completes cleanup within reasonable time + even when connected to processes that exit slowly. + """ + + # Use a Python script that simulates a long-running process + # This ensures consistent behavior across platforms + long_running_script = textwrap.dedent( + """ + import time + import sys + + # Simulate a long-running process + for i in range(100): + time.sleep(0.1) + # Flush to ensure output is visible + sys.stdout.flush() + sys.stderr.flush() + """ + ) + + server_params = StdioServerParameters( + command=sys.executable, + args=["-c", long_running_script], + ) + + start_time = time.time() + + with anyio.move_on_after(8.0) as cancel_scope: + async with stdio_client(server_params) as (read_stream, write_stream): + # Immediately exit - this triggers cleanup while process is still running + pass + + end_time = time.time() + elapsed = end_time - start_time + + # On Windows: 2s (stdin wait) + 2s (terminate wait) + overhead = ~5s expected + assert elapsed < 6.0, ( + f"stdio_client cleanup took {elapsed:.1f} seconds, expected < 6.0 seconds. " + f"This suggests the timeout mechanism may not be working properly." + ) + + # Check if we timed out + if cancel_scope.cancelled_caught: + pytest.fail( + "stdio_client cleanup timed out after 8.0 seconds. " + "This indicates the cleanup mechanism is hanging and needs fixing." + ) + + +@pytest.mark.anyio +@pytest.mark.skipif(sys.platform == "win32", reason="Windows signal handling is different") +async def test_stdio_client_sigint_only_process(): + """ + Test cleanup with a process that ignores SIGTERM but responds to SIGINT. + """ + # Create a Python script that ignores SIGTERM but handles SIGINT + script_content = textwrap.dedent( + """ + import signal + import sys + import time + + # Ignore SIGTERM (what process.terminate() sends) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + + # Handle SIGINT (Ctrl+C signal) by exiting cleanly + def sigint_handler(signum, frame): + sys.exit(0) + + signal.signal(signal.SIGINT, sigint_handler) + + # Keep running until SIGINT received + while True: + time.sleep(0.1) + """ + ) + + server_params = StdioServerParameters( + command=sys.executable, + args=["-c", script_content], + ) + + start_time = time.time() + + try: + # Use anyio timeout to prevent test from hanging forever + with anyio.move_on_after(5.0) as cancel_scope: + async with stdio_client(server_params) as (read_stream, write_stream): + # Let the process start and begin ignoring SIGTERM + await anyio.sleep(0.5) + # Exit context triggers cleanup - this should not hang + pass + + if cancel_scope.cancelled_caught: + raise TimeoutError("Test timed out") + + end_time = time.time() + elapsed = end_time - start_time + + # Should complete quickly even with SIGTERM-ignoring process + # This will fail if cleanup only uses process.terminate() without fallback + assert elapsed < 5.0, ( + f"stdio_client cleanup took {elapsed:.1f} seconds with SIGTERM-ignoring process. " + f"Expected < 5.0 seconds. This suggests the cleanup needs SIGINT/SIGKILL fallback." + ) + except (TimeoutError, Exception) as e: + if isinstance(e, TimeoutError) or "timed out" in str(e): + pytest.fail( + "stdio_client cleanup timed out after 5.0 seconds with SIGTERM-ignoring process. " + "This confirms the cleanup needs SIGINT/SIGKILL fallback for processes that ignore SIGTERM." + ) + else: + raise From c9b43bca38ef26f95150e608482ae3ee0061b3fa Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Fri, 4 Jul 2025 18:38:05 +0100 Subject: [PATCH 2/3] Try anyio.open_process first before Windows fallback This re-establishes behavior before #596 in the default case. - Attempt to use anyio's native open_process function on Windows - Fall back to subprocess.Popen only if NotImplementedError is raised - This improves compatibility with event loops that support async subprocesses - Extract fallback logic into separate function for clarity --- src/mcp/client/stdio/win32.py | 50 ++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/src/mcp/client/stdio/win32.py b/src/mcp/client/stdio/win32.py index df9d0f068..d5f1dd0b9 100644 --- a/src/mcp/client/stdio/win32.py +++ b/src/mcp/client/stdio/win32.py @@ -8,7 +8,9 @@ from pathlib import Path from typing import BinaryIO, TextIO, cast +import anyio from anyio import to_thread +from anyio.abc import Process from anyio.streams.file import FileReadStream, FileWriteStream @@ -113,13 +115,14 @@ async def create_windows_process( env: dict[str, str] | None = None, errlog: TextIO | None = sys.stderr, cwd: Path | str | None = None, -) -> FallbackProcess: +) -> Process | FallbackProcess: """ Creates a subprocess in a Windows-compatible way. - On Windows, asyncio.create_subprocess_exec has incomplete support - (NotImplementedError when trying to open subprocesses). - Therefore, we fallback to subprocess.Popen and wrap it for async usage. + Attempt to use anyio's open_process for async subprocess creation. + In some cases this will throw NotImplementedError on Windows, e.g. + when using the SelectorEventLoop which does not support async subprocesses. + In that case, we fall back to using subprocess.Popen. Args: command (str): The executable to run @@ -131,6 +134,45 @@ async def create_windows_process( Returns: FallbackProcess: Async-compatible subprocess with stdin and stdout streams """ + try: + # First try using anyio with Windows-specific flags to hide console window + process = await anyio.open_process( + [command, *args], + env=env, + # Ensure we don't create console windows for each process + creationflags=subprocess.CREATE_NO_WINDOW # type: ignore + if hasattr(subprocess, "CREATE_NO_WINDOW") + else 0, + stderr=errlog, + cwd=cwd, + ) + return process + except NotImplementedError: + # Windows often doesn't support async subprocess creation, use fallback + return await _create_windows_fallback_process(command, args, env, errlog, cwd) + except Exception: + # Try again without creation flags + process = await anyio.open_process( + [command, *args], + env=env, + stderr=errlog, + cwd=cwd, + ) + return process + + +async def _create_windows_fallback_process( + command: str, + args: list[str], + env: dict[str, str] | None = None, + errlog: TextIO | None = sys.stderr, + cwd: Path | str | None = None, +) -> FallbackProcess: + """ + Create a subprocess using subprocess.Popen as a fallback when anyio fails. + + This function wraps the sync subprocess.Popen in an async-compatible interface. + """ try: # Try launching with creationflags to avoid opening a new console window popen_obj = subprocess.Popen( From 5771abc551e0b0e01fa175ecd82689eba0ea39b3 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Tue, 8 Jul 2025 11:46:17 +0100 Subject: [PATCH 3/3] Address @ihrpr review feedback - Add SIGTERM_IGNORING_PROCESS_TIMEOUT constant in tests to document timeout behavior - Add PROCESS_TERMINATION_TIMEOUT constant to replace magic number in stdio client - Restore deprecated terminate_windows_process function with original functionality to maintain backward compatibility for external users The deprecated function is marked using @deprecated decorator following the codebase convention, while preserving its original terminate-wait-kill behavior. --- src/mcp/client/stdio/__init__.py | 5 ++++- src/mcp/client/stdio/win32.py | 26 ++++++++++++++++++++++++++ tests/client/test_stdio.py | 13 ++++++++++--- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/mcp/client/stdio/__init__.py b/src/mcp/client/stdio/__init__.py index f02ee4607..0fd815ac7 100644 --- a/src/mcp/client/stdio/__init__.py +++ b/src/mcp/client/stdio/__init__.py @@ -37,6 +37,9 @@ else ["HOME", "LOGNAME", "PATH", "SHELL", "TERM", "USER"] ) +# Timeout for process termination before falling back to force kill +PROCESS_TERMINATION_TIMEOUT = 2.0 + def get_default_environment() -> dict[str, str]: """ @@ -180,7 +183,7 @@ async def stdin_writer(): # Clean up process to prevent any dangling orphaned processes try: process.terminate() - with anyio.fail_after(2.0): + with anyio.fail_after(PROCESS_TERMINATION_TIMEOUT): await process.wait() except TimeoutError: # If process doesn't terminate in time, force kill it diff --git a/src/mcp/client/stdio/win32.py b/src/mcp/client/stdio/win32.py index d5f1dd0b9..d046084bb 100644 --- a/src/mcp/client/stdio/win32.py +++ b/src/mcp/client/stdio/win32.py @@ -12,6 +12,7 @@ from anyio import to_thread from anyio.abc import Process from anyio.streams.file import FileReadStream, FileWriteStream +from typing_extensions import deprecated def get_windows_executable_command(command: str) -> str: @@ -199,3 +200,28 @@ async def _create_windows_fallback_process( bufsize=0, ) return FallbackProcess(popen_obj) + + +@deprecated( + "terminate_windows_process is deprecated and will be removed in a future version. " + "Process termination is now handled internally by the stdio_client context manager." +) +async def terminate_windows_process(process: Process | FallbackProcess): + """ + Terminate a Windows process. + + Note: On Windows, terminating a process with process.terminate() doesn't + always guarantee immediate process termination. + So we give it 2s to exit, or we call process.kill() + which sends a SIGKILL equivalent signal. + + Args: + process: The process to terminate + """ + try: + process.terminate() + with anyio.fail_after(2.0): + await process.wait() + except TimeoutError: + # Force kill if it doesn't terminate + process.kill() diff --git a/tests/client/test_stdio.py b/tests/client/test_stdio.py index eda7dd3e3..585cb8eda 100644 --- a/tests/client/test_stdio.py +++ b/tests/client/test_stdio.py @@ -15,6 +15,11 @@ from mcp.shared.message import SessionMessage from mcp.types import CONNECTION_CLOSED, JSONRPCMessage, JSONRPCRequest, JSONRPCResponse +# Timeout for cleanup of processes that ignore SIGTERM +# This timeout ensures the test fails quickly if the cleanup logic doesn't have +# proper fallback mechanisms (SIGINT/SIGKILL) for processes that ignore SIGTERM +SIGTERM_IGNORING_PROCESS_TIMEOUT = 5.0 + tee: str = shutil.which("tee") # type: ignore python: str = shutil.which("python") # type: ignore @@ -200,14 +205,16 @@ def sigint_handler(signum, frame): # Should complete quickly even with SIGTERM-ignoring process # This will fail if cleanup only uses process.terminate() without fallback - assert elapsed < 5.0, ( + assert elapsed < SIGTERM_IGNORING_PROCESS_TIMEOUT, ( f"stdio_client cleanup took {elapsed:.1f} seconds with SIGTERM-ignoring process. " - f"Expected < 5.0 seconds. This suggests the cleanup needs SIGINT/SIGKILL fallback." + f"Expected < {SIGTERM_IGNORING_PROCESS_TIMEOUT} seconds. " + "This suggests the cleanup needs SIGINT/SIGKILL fallback." ) except (TimeoutError, Exception) as e: if isinstance(e, TimeoutError) or "timed out" in str(e): pytest.fail( - "stdio_client cleanup timed out after 5.0 seconds with SIGTERM-ignoring process. " + f"stdio_client cleanup timed out after {SIGTERM_IGNORING_PROCESS_TIMEOUT} seconds " + "with SIGTERM-ignoring process. " "This confirms the cleanup needs SIGINT/SIGKILL fallback for processes that ignore SIGTERM." ) else: