From 20117abc7cfc04cd2cc4e37e7f8b2b0eaef0df51 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Thu, 7 May 2026 11:21:14 -0700 Subject: [PATCH 1/4] Don't manipulate time in oximeter database usage tests - The database usage tests paused and advanced time to ensure we trigger another query against the DB to compute usage. This interacts poorly with a timeout in the connection type for talking to ClickHouse. Tokio's test-utils allow pausing time, but when there is literally no other work to do, the runtime "auto-advances" time to the next pending timer. That happens to be our connection timeout, and so the tests fail. Instead, just lower the polling interval for this one test and live with it taking a few seconds. - Fix #10398 --- clickhouse-admin/src/context.rs | 67 ++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs index c55bd25dea0..02d82a242aa 100644 --- a/clickhouse-admin/src/context.rs +++ b/clickhouse-admin/src/context.rs @@ -715,7 +715,15 @@ async fn get_retention_policy( // // This is a WAG, but the computations we do today to report table usage are // pretty inexpensive. +// +// NOTE: We explicitly use a smaller value during testing. This is to avoid +// manipulating time with Tokio's test utils. That cannot work, because we run +// every query to ClickHouse with a timeout, and that causes the tokio timers to +// "auto-advance" to the end of that timeout when we pause in a test. +#[cfg(not(test))] const USAGE_UPDATE_INTERVAL: Duration = Duration::from_mins(2); +#[cfg(test)] +const USAGE_UPDATE_INTERVAL: Duration = Duration::from_secs(1); async fn long_running_usage_task( tx: watch::Sender, @@ -953,14 +961,26 @@ mod tests { .is_empty() ); - // Jump forward until we actually do compute the usage again. - tokio::time::pause(); - let now = tokio::time::Instant::now(); - while now.elapsed() < 2 * USAGE_UPDATE_INTERVAL { - tokio::time::advance(std::time::Duration::from_millis(10)).await; - } - tokio::time::resume(); - let usage = context.database_usage(); + // Wait until we actually do compute the usage again. + let usage = dev::poll::wait_for_condition( + || async { + let usage = context.database_usage(); + match &usage.last_success { + Some(success) => { + if success.tables.is_empty() { + Err(dev::poll::CondCheckError::<()>::NotYet) + } else { + Ok(usage) + } + } + None => Err(dev::poll::CondCheckError::<()>::NotYet), + } + }, + &std::time::Duration::from_millis(100), + &(2 * USAGE_UPDATE_INTERVAL), + ) + .await + .unwrap(); println!("{usage:#?}"); let tables = &usage .last_success @@ -968,19 +988,26 @@ mod tests { .expect("Should have computed something") .tables; tables.contains_key(&String::from("oximeter.measurements_u64")); - tables.contains_key(&String::from("oximeter.measurements_u64")); + tables.contains_key(&String::from("oximeter.measurements_f64")); let version = tables.get(&String::from("oximeter.version")).unwrap(); assert_eq!(version.n_rows, 1); - // Kill the database, and force another collection. + // Kill the database, and wait for another collection. This one should + // fail. clickhouse.cleanup().await.unwrap(); - tokio::time::pause(); - let now = tokio::time::Instant::now(); - while now.elapsed() < 2 * USAGE_UPDATE_INTERVAL { - tokio::time::advance(std::time::Duration::from_millis(10)).await; - } - tokio::time::resume(); - let usage = context.database_usage(); + let usage = dev::poll::wait_for_condition( + || async { + let usage = context.database_usage(); + match &usage.last_error { + Some(_) => Ok(usage), + None => Err(dev::poll::CondCheckError::<()>::NotYet), + } + }, + &std::time::Duration::from_millis(100), + &(2 * USAGE_UPDATE_INTERVAL), + ) + .await + .unwrap(); println!("{usage:#?}"); assert!( usage.last_success.is_some(), @@ -989,7 +1016,11 @@ mod tests { let Some(err) = usage.last_error.as_ref() else { panic!("expected an error to have occurred, but found None"); }; - assert!(err.error.starts_with("Failed to check out")); + let is_network_err = |msg: &str| -> bool { + msg.starts_with("Failed to check out") + || msg.contains("TCP connection to server") + }; + assert!(is_network_err(&err.error), "Expected a network error error"); logctx.cleanup_successful(); } From 79b5f3f74834442e3f7936d805da3f2755a957e2 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 12 May 2026 10:35:41 -0700 Subject: [PATCH 2/4] Review feedback --- clickhouse-admin/src/context.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs index 02d82a242aa..552ad5164a8 100644 --- a/clickhouse-admin/src/context.rs +++ b/clickhouse-admin/src/context.rs @@ -723,7 +723,7 @@ async fn get_retention_policy( #[cfg(not(test))] const USAGE_UPDATE_INTERVAL: Duration = Duration::from_mins(2); #[cfg(test)] -const USAGE_UPDATE_INTERVAL: Duration = Duration::from_secs(1); +const USAGE_UPDATE_INTERVAL: Duration = Duration::from_millis(250); async fn long_running_usage_task( tx: watch::Sender, @@ -976,7 +976,7 @@ mod tests { None => Err(dev::poll::CondCheckError::<()>::NotYet), } }, - &std::time::Duration::from_millis(100), + &std::time::Duration::from_millis(50), &(2 * USAGE_UPDATE_INTERVAL), ) .await @@ -1018,7 +1018,7 @@ mod tests { }; let is_network_err = |msg: &str| -> bool { msg.starts_with("Failed to check out") - || msg.contains("TCP connection to server") + || msg.starts_with("Native protocol error") }; assert!(is_network_err(&err.error), "Expected a network error error"); From c341dfd50a2fb7fae7150b62e81b65d743f76e21 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Wed, 13 May 2026 11:47:18 -0700 Subject: [PATCH 3/4] Tables can be populated here --- clickhouse-admin/src/context.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs index 552ad5164a8..ea895627e8e 100644 --- a/clickhouse-admin/src/context.rs +++ b/clickhouse-admin/src/context.rs @@ -953,13 +953,6 @@ mod tests { let usage = context.database_usage(); println!("{usage:#?}"); assert!(usage.last_success.is_some()); - assert!( - usage - .last_success - .expect("Should have successfully computed something") - .tables - .is_empty() - ); // Wait until we actually do compute the usage again. let usage = dev::poll::wait_for_condition( From 1f8b86ae6bbe2e4936e665e687741a8044496bce Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Thu, 14 May 2026 15:42:15 -0700 Subject: [PATCH 4/4] Not like that either --- clickhouse-admin/src/context.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs index ea895627e8e..cfcf8d15e90 100644 --- a/clickhouse-admin/src/context.rs +++ b/clickhouse-admin/src/context.rs @@ -980,10 +980,13 @@ mod tests { .as_ref() .expect("Should have computed something") .tables; - tables.contains_key(&String::from("oximeter.measurements_u64")); - tables.contains_key(&String::from("oximeter.measurements_f64")); - let version = tables.get(&String::from("oximeter.version")).unwrap(); - assert_eq!(version.n_rows, 1); + assert!( + tables.contains_key(&String::from("oximeter.measurements_u64")) + ); + assert!( + tables.contains_key(&String::from("oximeter.measurements_f64")) + ); + assert!(tables.contains_key(&String::from("oximeter.version"))); // Kill the database, and wait for another collection. This one should // fail.