From 8a3cd9206b61bd42d7d4d6b7e83312c26caa919e Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 18 Mar 2026 14:03:35 -0400 Subject: [PATCH] Terminate when ra_systems_sup exits The goal of this change is to shut down Rabbit when the `ra_systems_sup` supervisor exits. If disk is completely exhausted then the `ra_systems_sup` supervisor can exit from the repeated `enospc` errors. If we do not also crash Rabbit, Rabbit continues on but Khepri is unavailable, so no incoming connections can successfully log in. Plus there are other effects like `rabbit_vhost_process` deleting a vhost because Khepri does not say that it exists. --- deps/rabbit/src/rabbit_ra_systems.erl | 52 +++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/deps/rabbit/src/rabbit_ra_systems.erl b/deps/rabbit/src/rabbit_ra_systems.erl index f76602d4e9e..d32d0cce1fb 100644 --- a/deps/rabbit/src/rabbit_ra_systems.erl +++ b/deps/rabbit/src/rabbit_ra_systems.erl @@ -7,6 +7,8 @@ -module(rabbit_ra_systems). +-behaviour(gen_server). + -include_lib("kernel/include/logger.hrl"). -include_lib("rabbit_common/include/logging.hrl"). @@ -20,6 +22,21 @@ ensure_started/0, ensure_stopped/0]). +-export([start_link/0, + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). +-export([stop/0]). + +-rabbit_boot_step({rabbit_ra_systems_monitor, + [{description, "monitor process to shut down when Ra systems exit"}, + {mfa, {rabbit_sup, start_child, [?MODULE]}}, + {requires, [kernel_ready]}, + {cleanup, {?MODULE, stop, []}}]}). + -type ra_system_name() :: atom(). -define(COORD_WAL_MAX_SIZE_B, 64_000_000). @@ -192,3 +209,38 @@ ensure_ra_system_stopped(RaSystem) -> #{domain => ?RMQLOG_DOMAIN_GLOBAL}), throw(Error) end. + +%% ---------------------------------------------------------------------------- + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +stop() -> + case supervisor:terminate_child(rabbit_sup, ?MODULE) of + ok -> ok = supervisor:delete_child(rabbit_sup, ?MODULE); + {error, not_found} -> ok + end. + +init(_) -> + process_flag(trap_exit, true), + Pid = whereis(ra_systems_sup), + true = link(Pid), + {ok, Pid, hibernate}. + +handle_call(_Request, _From, State) -> {noreply, State}. + +handle_cast(_Msg, State) -> {noreply, State}. + +handle_info({'EXIT', RaSystemsSup, Reason} = E, RaSystemsSup) -> + ?LOG_ERROR( + ?MODULE_STRING ": Ra system supervisor exited with reason ~tp~n", + [Reason], + #{domain => ?RMQLOG_DOMAIN_GLOBAL}), + exit(E); +handle_info(_Info, State) -> {noreply, State}. + +terminate(_, RaSystemsSup) -> + true = unlink(RaSystemsSup), + ok. + +code_change(_OldVsn, State, _Extra) -> {ok, State}.