Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions consensus/src/counters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,34 @@ pub static WAIT_FOR_FULL_BLOCKS_TRIGGERED: Lazy<Histogram> = Lazy::new(|| {
)
});

/// Duration of the full pull loop (outer loop with retries) in seconds.
/// Custom buckets cover 0–1s so the 250–500ms default-bucket gap doesn't
/// hide the 300ms poll ceiling.
pub static PULL_LOOP_DURATION: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"aptos_consensus_pull_loop_duration_seconds",
"Duration of the full payload pull loop including retries",
// Sub-ms to 10ms for fast path, then 30ms steps (matching NO_TXN_DELAY)
// up to 330ms, then coarser up to 1s for non-default configs.
vec![
0.001, 0.002, 0.005, 0.01, 0.03, 0.06, 0.09, 0.12, 0.15, 0.18, 0.21, 0.24, 0.27, 0.30,
0.33, 0.5, 0.75, 1.0,
],
)
.unwrap()
});

/// Number of empty retries in the pull loop before getting a payload.
/// Buckets cover up to 34 retries (enough for 1000ms non-default poll time).
pub static PULL_LOOP_EMPTY_RETRIES: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"aptos_consensus_pull_loop_empty_retries",
"Number of empty retries in the pull loop",
vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0,],
)
.unwrap()
});

/// Counts when pipeline backpressure is triggered
pub static PIPELINE_BACKPRESSURE_ON_PROPOSAL_TRIGGERED: Lazy<Histogram> = Lazy::new(|| {
register_avg_counter(
Expand Down
12 changes: 10 additions & 2 deletions consensus/src/payload_client/user/quorum_store_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// Licensed pursuant to the Innovation-Enabling Source Code License, available at https://github.com/aptos-labs/aptos-core/blob/main/LICENSE

use crate::{
counters::WAIT_FOR_FULL_BLOCKS_TRIGGERED, error::QuorumStoreError, monitor,
counters::{PULL_LOOP_DURATION, PULL_LOOP_EMPTY_RETRIES, WAIT_FOR_FULL_BLOCKS_TRIGGERED},
error::QuorumStoreError,
monitor,
payload_client::user::UserPayloadClient,
};
use aptos_consensus_types::{
Expand Down Expand Up @@ -105,6 +107,7 @@ impl UserPayloadClient for QuorumStoreClient {
});
// keep polling QuorumStore until there's payloads available or there's still pending payloads
let start_time = Instant::now();
let mut empty_retries: u64 = 0;

let payload = loop {
// Make sure we don't wait more than expected, due to thread scheduling delays/processing time consumed
Expand All @@ -122,15 +125,20 @@ impl UserPayloadClient for QuorumStoreClient {
)
.await?;
if payload.is_empty() && !return_empty && !done {
empty_retries += 1;
sleep(Duration::from_millis(NO_TXN_DELAY)).await;
continue;
}
break payload;
};
let pull_duration = start_time.elapsed();
PULL_LOOP_DURATION.observe(pull_duration.as_secs_f64());
PULL_LOOP_EMPTY_RETRIES.observe(empty_retries as f64);
debug!(
pull_params = ?params,
duration_ms = start_time.elapsed().as_millis() as u64,
duration_ms = pull_duration.as_millis() as u64,
payload_len = payload.len(),
empty_retries = empty_retries,
return_empty = return_empty,
return_non_full = return_non_full,
"Pull payloads from QuorumStore: proposal"
Expand Down
Loading