From dd4e6d5e929e84dee7e7593a7142acfa1d1606cf Mon Sep 17 00:00:00 2001 From: Manali Gawande <156853162+mkg0908@users.noreply.github.com> Date: Mon, 1 Jun 2026 16:59:30 -0700 Subject: [PATCH] fix: gracefully stop uvicorn threads during shutdown --- src/litserve/server.py | 7 +++++++ tests/unit/test_lit_server.py | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/litserve/server.py b/src/litserve/server.py index ccefb5c2..0689efbf 100644 --- a/src/litserve/server.py +++ b/src/litserve/server.py @@ -1208,6 +1208,12 @@ def _perform_graceful_shutdown( logger.warning(f"{log_prefix}: Already not alive.") continue try: + if isinstance(uw, threading.Thread): + getattr(uw, "_litserve_server").should_exit = True + uw.join(timeout=self.uvicorn_graceful_timeout) + if uw.is_alive(): + logger.warning(f"{log_prefix}: Did not terminate gracefully.") + continue uw.terminate() uw.join(timeout=self.uvicorn_graceful_timeout) if uw.is_alive(): @@ -1577,6 +1583,7 @@ def _start_server(self, port, num_uvicorn_servers, log_level, sockets, uvicorn_w w = threading.Thread( target=server.run, args=(response_queue_id, sockets), name=f"LitServer-{response_queue_id}" ) + setattr(w, "_litserve_server", server) else: raise ValueError("Invalid value for api_server_worker_type. Must be 'process' or 'thread'") w.start() diff --git a/tests/unit/test_lit_server.py b/tests/unit/test_lit_server.py index d319ae19..4cd51e0d 100644 --- a/tests/unit/test_lit_server.py +++ b/tests/unit/test_lit_server.py @@ -16,6 +16,7 @@ import json import os import sys +import threading import time from time import sleep from unittest.mock import MagicMock, patch @@ -340,6 +341,28 @@ def test_server_terminate(): server._transport.close.assert_called() +def test_graceful_shutdown_stops_uvicorn_thread(simple_litapi): + server = LitServer(simple_litapi) + server._transport = MagicMock() + server.inference_workers = [] + uvicorn_server = MagicMock(should_exit=False) + + def wait_for_shutdown(): + while not uvicorn_server.should_exit: + sleep(0.01) + + worker = threading.Thread(target=wait_for_shutdown, name="LitServer-0") + setattr(worker, "_litserve_server", uvicorn_server) + worker.start() + + manager = MagicMock() + server._perform_graceful_shutdown(manager, {0: worker}) + + assert uvicorn_server.should_exit is True + assert not worker.is_alive() + manager.shutdown.assert_called_once() + + @pytest.mark.parametrize(("disable_openapi_url", "should_print"), [(False, True), (True, False)]) @patch("builtins.print") @patch("litserve.server.uvicorn")