Skip to content
20 changes: 18 additions & 2 deletions consensus/bor/bor.go
Original file line number Diff line number Diff line change
Expand Up @@ -1409,6 +1409,13 @@
// Seal implements consensus.Engine, attempting to create a sealed block using
// the local signing credentials.
func (c *Bor) Seal(chain consensus.ChainHeaderReader, block *types.Block, witness *stateless.Witness, results chan<- *consensus.NewSealedBlockEvent, stop <-chan struct{}) error {
return c.SealWithStopHook(chain, block, witness, results, stop, nil)
}

// SealWithStopHook is identical to Seal but invokes onStopExit (if non-nil)
// from the sealing goroutine on stop-branch exits only. The hook is NOT
// called on the successful-delivery path.
func (c *Bor) SealWithStopHook(chain consensus.ChainHeaderReader, block *types.Block, witness *stateless.Witness, results chan<- *consensus.NewSealedBlockEvent, stop <-chan struct{}, onStopExit func()) error {

Check failure on line 1418 in consensus/bor/bor.go

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this method to reduce its Cognitive Complexity from 19 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=0xPolygon_bor&issues=AZ4ii-XZd_NUUJjVb8ah&open=AZ4ii-XZd_NUUJjVb8ah&pullRequest=2220
header := block.Header()
// Sealing the genesis block is not supported
number := header.Number.Uint64()
Expand Down Expand Up @@ -1469,6 +1476,9 @@
select {
case <-stop:
log.Debug("Discarding sealing operation for block", "number", number)
if onStopExit != nil {
onStopExit()
}
return
case <-time.After(delay):
if wiggle > 0 {
Expand All @@ -1489,10 +1499,16 @@
"headerDifficulty", header.Difficulty,
)
}
// Block on send (or exit on stop). A default branch here would
// drop the result silently when results is full, leaking the
// miner's pendingTasks entry.
select {
case results <- &consensus.NewSealedBlockEvent{Block: block.WithSeal(header), Witness: witness}:
default:
log.Warn("Sealing result was not read by miner", "number", number, "sealhash", SealHash(header, c.config))
case <-stop:
log.Info("Seal interrupted before result delivery", "number", number, "sealhash", SealHash(header, c.config))
if onStopExit != nil {
onStopExit()
}
}
}()

Expand Down
217 changes: 217 additions & 0 deletions consensus/bor/bor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"math"
"math/big"
"sync/atomic"
"testing"
"time"

Expand Down Expand Up @@ -2652,6 +2653,222 @@ func TestSeal_AuthorizedSigner(t *testing.T) {
}
}

// TestSeal_BlocksOnFullResultChannelInsteadOfSilentDrop is a regression test
// for the silent-drop in Bor.Seal's result-delivery goroutine.
//
// Bug: the goroutine's second select used `default` as the not-sent path:
//
// select {
// case results <- &consensus.NewSealedBlockEvent{...}:
// default:
// log.Warn("Sealing result was not read by miner", ...)
// }
//
// When the result channel had no ready receiver (e.g., resultLoop blocked on
// a slow chain.WriteBlockAndSetHead under elephant-contract load), `default`
// fired immediately and the result was discarded without posting. The
// worker's pendingTasks entry for this sealhash would then leak: resultLoop
// never received it, never deleted it, and the producer's veblop fallback
// would short-circuit on hasPendingTasks > 0 every tick afterwards. This is
// post-mortem candidate 2 for the 2026-05-07 Amoy val4 stall.
//
// Fix: replace `default` with `case <-stop` so the goroutine either delivers
// or exits cleanly on interrupt. The taskLoop's interrupt() (with its
// pendingTasks-delete companion fix) is then the single place that cleans
// up on the stop path.
//
// Test scheme: drive Seal with a zero-buffer results channel and NO reader
// initially. With the bug, the goroutine immediately drops via `default`
// and a subsequent receive times out. With the fix, the goroutine blocks
// on send until our receive arrives, and we get the result back.
func TestSeal_BlocksOnFullResultChannelInsteadOfSilentDrop(t *testing.T) {
t.Parallel()
setup := newSignedChainSetup(t)
b := setup.bor
b.fakeDiff = true

b.Authorize(setup.signerAddr, func(account accounts.Account, mimeType string, data []byte) ([]byte, error) {
return crypto.Sign(crypto.Keccak256(data), setup.privKey)
})

h := setup.makeSignedHeader(t, 1, setup.genesis)
// Use a Time slightly in the future so the goroutine exercises a real
// time.After(delay) wait. We need a non-trivial delay so we can ensure
// the goroutine reaches the second select *before* our receive — that
// is the precise condition under which the buggy `default` branch
// would fire (no receiver ready, no buffer slot, no fallback).
const delaySec = 1
h.Time = uint64(time.Now().Unix()) + delaySec

body := &types.Body{Transactions: types.Transactions{types.NewTx(&types.LegacyTx{})}}
block := types.NewBlock(h, body, nil, trie.NewStackTrie(nil))

// Zero-buffer channel + no receiver-ready at the moment the goroutine
// reaches the second select. Pre-fix: `default` fires → silent drop.
// Post-fix: send blocks until either a receiver pairs with it or stop
// closes — neither happens here, so the goroutine remains parked on
// send and our delayed receive pairs with it.
results := make(chan *consensus.NewSealedBlockEvent)
stop := make(chan struct{})

err := b.Seal(setup.chain.HeaderChain(), block, nil, results, stop)
require.NoError(t, err, "Seal should return nil and spawn the sealing goroutine")

// Wait long enough for the goroutine's time.After(delay) to fire AND
// for it to advance into the second select. This is the critical
// ordering: with the bug, by the time we start receiving below the
// goroutine has already taken the silent `default` path. With the fix
// the goroutine is parked on the send waiting for any receiver.
time.Sleep(time.Duration(delaySec)*time.Second + 500*time.Millisecond)

select {
case ev := <-results:
require.NotNil(t, ev, "expected a sealed block event")
require.NotNil(t, ev.Block, "expected ev.Block to be non-nil")
require.Equal(t, block.NumberU64(), ev.Block.NumberU64(),
"sealed block number should match the input block")
case <-time.After(2 * time.Second):
t.Fatal("Bor.Seal silently dropped the result via the second-select default branch; " +
"expected the goroutine to remain blocked on send (or exit on <-stop) instead. " +
"This was the leak path for val4-style \"elected but silent\" stalls under load.")
}
}

// TestSealWithStopHook_FirstSelectStopBranch verifies onStopExit is invoked
// when stop fires before the delay timer.
func TestSealWithStopHook_FirstSelectStopBranch(t *testing.T) {
t.Parallel()
setup := newSignedChainSetup(t)
b := setup.bor
b.fakeDiff = true

b.Authorize(setup.signerAddr, func(account accounts.Account, mimeType string, data []byte) ([]byte, error) {
return crypto.Sign(crypto.Keccak256(data), setup.privKey)
})

h := setup.makeSignedHeader(t, 1, setup.genesis)
h.Time = uint64(time.Now().Unix()) + 30

body := &types.Body{Transactions: types.Transactions{types.NewTx(&types.LegacyTx{})}}
block := types.NewBlock(h, body, nil, trie.NewStackTrie(nil))

results := make(chan *consensus.NewSealedBlockEvent, 1)
stop := make(chan struct{})

var hookCalls atomic.Int32
hookDone := make(chan struct{})
onStopExit := func() {
hookCalls.Add(1)
close(hookDone)
}

err := b.SealWithStopHook(setup.chain.HeaderChain(), block, nil, results, stop, onStopExit)
require.NoError(t, err)

// Give the goroutine a moment to enter the first select.
time.Sleep(100 * time.Millisecond)
close(stop)

select {
case <-hookDone:
case <-time.After(2 * time.Second):
t.Fatal("onStopExit was not invoked on first-select stop-branch exit")
}
require.Equal(t, int32(1), hookCalls.Load(), "hook must be called exactly once")

select {
case ev := <-results:
t.Fatalf("unexpected result on stop-branch exit: %+v", ev)
case <-time.After(100 * time.Millisecond):
}
}

// TestSealWithStopHook_SecondSelectStopBranch verifies onStopExit is invoked
// when stop fires after the delay timer but before the result send completes.
func TestSealWithStopHook_SecondSelectStopBranch(t *testing.T) {
t.Parallel()
setup := newSignedChainSetup(t)
b := setup.bor
b.fakeDiff = true

b.Authorize(setup.signerAddr, func(account accounts.Account, mimeType string, data []byte) ([]byte, error) {
return crypto.Sign(crypto.Keccak256(data), setup.privKey)
})

h := setup.makeSignedHeader(t, 1, setup.genesis)
h.Time = uint64(time.Now().Unix()) + 1

body := &types.Body{Transactions: types.Transactions{types.NewTx(&types.LegacyTx{})}}
block := types.NewBlock(h, body, nil, trie.NewStackTrie(nil))

// Zero-buffer + no reader: goroutine parks on send in the second select.
results := make(chan *consensus.NewSealedBlockEvent)
stop := make(chan struct{})

var hookCalls atomic.Int32
hookDone := make(chan struct{})
onStopExit := func() {
hookCalls.Add(1)
close(hookDone)
}

err := b.SealWithStopHook(setup.chain.HeaderChain(), block, nil, results, stop, onStopExit)
require.NoError(t, err)

time.Sleep(1500 * time.Millisecond)
close(stop)

select {
case <-hookDone:
case <-time.After(2 * time.Second):
t.Fatal("onStopExit was not invoked on second-select stop-branch exit")
}
require.Equal(t, int32(1), hookCalls.Load(), "hook must be called exactly once")
}

// TestSealWithStopHook_NotCalledOnSuccess verifies onStopExit is NOT invoked
// when the goroutine delivers the result successfully — otherwise the
// success path would race with cleanup and drop valid blocks.
func TestSealWithStopHook_NotCalledOnSuccess(t *testing.T) {
t.Parallel()
setup := newSignedChainSetup(t)
b := setup.bor
b.fakeDiff = true

b.Authorize(setup.signerAddr, func(account accounts.Account, mimeType string, data []byte) ([]byte, error) {
return crypto.Sign(crypto.Keccak256(data), setup.privKey)
})

h := setup.makeSignedHeader(t, 1, setup.genesis)
h.Time = uint64(time.Now().Unix()) + 1

body := &types.Body{Transactions: types.Transactions{types.NewTx(&types.LegacyTx{})}}
block := types.NewBlock(h, body, nil, trie.NewStackTrie(nil))

results := make(chan *consensus.NewSealedBlockEvent, 1)
stop := make(chan struct{})

var hookCalls atomic.Int32
onStopExit := func() {
hookCalls.Add(1)
}

err := b.SealWithStopHook(setup.chain.HeaderChain(), block, nil, results, stop, onStopExit)
require.NoError(t, err)

select {
case ev := <-results:
require.NotNil(t, ev)
require.NotNil(t, ev.Block)
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for sealed block on success path")
}

time.Sleep(200 * time.Millisecond)
require.Equal(t, int32(0), hookCalls.Load(),
"onStopExit must NOT be called on success-branch exit")
}

func TestSeal_UnauthorizedSigner(t *testing.T) {
t.Parallel()

Expand Down
6 changes: 4 additions & 2 deletions miner/fake_miner.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,11 @@ func (m *mockBackendBor) BlockChain() *core.BlockChain {
return m.bc
}

// PeerCount implements Backend.
// PeerCount implements Backend. Returns 1 so the worker's mainLoop sees the
// node as peered — tests using mockBackendBor are not exercising the
// PeerCount==0 gate.
func (*mockBackendBor) PeerCount() int {
panic("unimplemented")
return 1
}

func (m *mockBackendBor) TxPool() *txpool.TxPool {
Expand Down
5 changes: 3 additions & 2 deletions miner/miner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ func (m *mockBackend) BlockChain() *core.BlockChain {
return m.bc
}

// PeerCount implements Backend.
// PeerCount implements Backend. Returns 1 so the worker's mainLoop sees the
// node as peered — miner_test.go is not exercising the PeerCount==0 gate.
func (*mockBackend) PeerCount() int {
panic("unimplemented")
return 1
}

func (m *mockBackend) TxPool() *txpool.TxPool {
Expand Down
Loading
Loading