Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/bb_replicator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,5 @@ func main() {

lifecycleState.MarkReadyAndWait(siblingsGroup)
return nil
})
}, program.WithDaemonExit())
}
2 changes: 1 addition & 1 deletion cmd/bb_storage/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func main() {

lifecycleState.MarkReadyAndWait(siblingsGroup)
return nil
})
}, program.WithDaemonExit())
}

func newNonScannableBlobAccess(dependenciesGroup program.Group, configuration *bb_storage.NonScannableBlobAccessConfiguration, creator blobstore_configuration.BlobAccessCreator, grpcClientFactory bb_grpc.ClientFactory) (blobstore_configuration.BlobAccessInfo, blobstore.BlobAccess, []auth.Authorizer, auth.Authorizer, error) {
Expand Down
105 changes: 69 additions & 36 deletions pkg/program/run_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"runtime"
"sync"
"syscall"
"time"
)

// runMainErrorLogger is used by RunMain() to capture errors returned by
Expand All @@ -34,40 +33,66 @@ func (el *runMainErrorLogger) startShutdown(shutdownFunc func()) {
})
}

// terminateWithSignal terminates the current process by sending a
// signal to itself.
func terminateWithSignal(currentPID int, terminationSignal os.Signal) {
// runMainConfig collects the optional behaviours of RunMain. Constructed
// internally; callers populate it via Option values.
type runMainConfig struct {
daemon bool
}

// Option configures RunMain.
type Option func(*runMainConfig)

// WithDaemonExit causes a signal-triggered graceful shutdown to exit 0
// instead of 128+signal. Use this for long-running daemons (servers,
// workers, schedulers) where SIGINT/SIGTERM is the expected lifecycle
// event and a clean wind-down should look successful to the supervising
// process (k8s pod phase Succeeded, systemd inactive (dead) without
// failure).
//
// Without this option (the default), signal interruption exits with the
// POSIX-conventional 128+signal so wrapper scripts and init systems can
// distinguish a completed run from one that was interrupted mid-work.
// This is the right behaviour for one-shot CLI tools (bb_copy,
// sync_jwks_to_configmap, etc.).
func WithDaemonExit() Option {
return func(c *runMainConfig) { c.daemon = true }
}

// terminateWithSignal completes a shutdown initiated by terminationSignal.
//
// Previously this re-raised the signal back to the process via
// signal.Reset() + process.Signal() so the container/init system would
// observe a signal-style exit (e.g. 128+SIGTERM=143). signal.Reset()
// does not install SIG_DFL though — the runtime's signal trampoline
// still catches the raised signal and dispatches to
// runtime.dieFromSignal(), which falls through to a hard exit(2) when
// its signal-to-self races. Multi-goroutine programs running as PID 1
// in a PID namespace reliably hit that fall-through, surfacing exit 2
// despite a clean shutdown.
//
// Skip the signal-raise dance and exit with the right code directly.
// 128+signal is what POSIX shells (bash, zsh) and init systems
// (systemd, kubelet) report from WIFSIGNALED anyway, so the
// user-visible exit is equivalent without going near Go's signal
// machinery. Daemons opt into exit 0 via WithDaemonExit so a graceful
// shutdown via SIGTERM does not look like a failure to the supervisor.
//
// Refs:
// - https://github.com/golang/go/issues/19326
// - https://github.com/golang/go/issues/46321
func terminateWithSignal(currentPID int, terminationSignal os.Signal, daemon bool) {
if daemon {
os.Exit(0)
}
if runtime.GOOS == "windows" {
// On Windows, process.Signal() is not supported so
// immediately exit.
// On Windows, process.Signal() is not supported and
// signal numbers do not map to POSIX exit codes; just
// exit non-zero so wrapper scripts see the interruption.
os.Exit(1)
}

// Clear the signal handler and raise the
// original signal once again. That way we shut
// down under the original circumstances.
signal.Reset(terminationSignal)
process, err := os.FindProcess(currentPID)
if err != nil {
panic(err)
}
if err := process.Signal(terminationSignal); err != nil {
panic(err)
if sig, ok := terminationSignal.(syscall.Signal); ok {
os.Exit(128 + int(sig))
}

// This code should not be reached, if it weren't for the fact
// that process.Signal() does not guarantee that the signal is
// delivered to the same thread.
//
// Furthermore, signal.Reset() does not reset signals that are
// delivered via the process group, but ignored by the process
// itself. Fall back to calling os.Exit() if we don't get
// terminated via signal delivery.
//
// More details:
// https://github.com/golang/go/issues/19326
// https://github.com/golang/go/issues/46321
time.Sleep(5)
os.Exit(1)
}

Expand All @@ -86,16 +111,24 @@ var terminationSignals = []os.Signal{
// - One of the routines fails with a non-nil error. In that case the
// program terminates with exit code 1.
//
// - The program receives SIGINT or SIGTERM. In that case the program
// will terminate with that signal.
// - The program receives SIGINT or SIGTERM. By default the program
// terminates with exit code 128+signal (the POSIX convention),
// which is appropriate for one-shot tools. Pass WithDaemonExit to
// exit 0 instead — appropriate for long-running daemons where a
// signal-triggered shutdown is the normal lifecycle.
//
// In case termination occurs, all remaining routines are canceled,
// respecting dependencies between these routines. This can for example
// be used to ensure an outgoing database connection is terminated after
// an integrated RPC server is shut down.
func RunMain(routine Routine) {
func RunMain(routine Routine, opts ...Option) {
var cfg runMainConfig
for _, opt := range opts {
opt(&cfg)
}

currentPID := os.Getpid()
relaunchIfPID1(currentPID)
relaunchIfPID1(currentPID, cfg.daemon)

ctx, cancel := context.WithCancel(context.Background())
errorLogger := &runMainErrorLogger{
Expand All @@ -109,7 +142,7 @@ func RunMain(routine Routine) {
receivedSignal := <-signalChan
log.Printf("Received %#v signal. Initiating graceful shutdown.", receivedSignal.String())
errorLogger.startShutdown(func() {
terminateWithSignal(currentPID, receivedSignal)
terminateWithSignal(currentPID, receivedSignal, cfg.daemon)
})
}()

Expand Down
4 changes: 2 additions & 2 deletions pkg/program/run_main_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
// to terminate, we need to run multiple processes.
//
// More details: https://github.com/golang/go/pull/61261
func relaunchIfPID1(currentPID int) {
func relaunchIfPID1(currentPID int, daemon bool) {
if currentPID == 1 {
executable, err := os.Executable()
if err != nil {
Expand Down Expand Up @@ -62,7 +62,7 @@ func relaunchIfPID1(currentPID int) {

if waitedPID == childPID {
if status.Signaled() {
terminateWithSignal(currentPID, status.Signal())
terminateWithSignal(currentPID, status.Signal(), daemon)
}
os.Exit(status.ExitStatus())
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/program/run_main_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

package program

func relaunchIfPID1(currentPID int) {}
func relaunchIfPID1(currentPID int, daemon bool) {}
Loading