Skip to content

Commit 3d6d74b

Browse files
committed
fix: align JoinStatisticsProvider with config gate, fall back to direct column stats when no ExpressionAnalyzer
1 parent 0831066 commit 3d6d74b

1 file changed

Lines changed: 21 additions & 10 deletions

File tree

  • datafusion/physical-plan/src/operator_statistics

datafusion/physical-plan/src/operator_statistics/mod.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,9 @@ use std::sync::Arc;
9393

9494
use datafusion_common::stats::Precision;
9595
use datafusion_common::{Result, Statistics};
96+
use datafusion_physical_expr::PhysicalExpr;
9697
use datafusion_physical_expr::expression_analyzer::ExpressionAnalyzerRegistry;
98+
use datafusion_physical_expr::expressions::Column;
9799

98100
use crate::ExecutionPlan;
99101

@@ -779,14 +781,23 @@ impl StatisticsProvider for JoinStatisticsProvider {
779781
return Ok(StatisticsResult::Delegate);
780782
};
781783

782-
let default_analyzer;
783-
let analyzer = match plan.expression_analyzer_registry() {
784-
Some(r) => r,
785-
None => {
786-
default_analyzer = ExpressionAnalyzerRegistry::new();
787-
&default_analyzer
784+
let analyzer = plan.expression_analyzer_registry();
785+
786+
/// Resolve NDV for a join key expression. Uses the ExpressionAnalyzer
787+
/// when available (handles arbitrary expressions), otherwise falls back
788+
/// to direct column statistics lookup (bare column keys only).
789+
fn resolve_key_ndv(
790+
key: &Arc<dyn PhysicalExpr>,
791+
stats: &Statistics,
792+
analyzer: Option<&ExpressionAnalyzerRegistry>,
793+
) -> Option<usize> {
794+
if let Some(a) = analyzer {
795+
return a.get_distinct_count(key, stats);
788796
}
789-
};
797+
key.downcast_ref::<Column>()
798+
.and_then(|c| stats.column_statistics.get(c.index()))
799+
.and_then(|s| s.distinct_count.get_value().copied())
800+
}
790801

791802
/// Estimate equi-join output using NDV of join key expressions:
792803
/// left_rows * right_rows / product(max(left_ndv_i, right_ndv_i))
@@ -797,15 +808,15 @@ impl StatisticsProvider for JoinStatisticsProvider {
797808
right: &Statistics,
798809
left_rows: usize,
799810
right_rows: usize,
800-
analyzer: &ExpressionAnalyzerRegistry,
811+
analyzer: Option<&ExpressionAnalyzerRegistry>,
801812
) -> usize {
802813
if on.is_empty() {
803814
return left_rows.saturating_mul(right_rows);
804815
}
805816
let mut ndv_divisor: usize = 1;
806817
for (left_key, right_key) in on {
807-
let left_ndv = analyzer.get_distinct_count(left_key, left);
808-
let right_ndv = analyzer.get_distinct_count(right_key, right);
818+
let left_ndv = resolve_key_ndv(left_key, left, analyzer);
819+
let right_ndv = resolve_key_ndv(right_key, right, analyzer);
809820
match (left_ndv, right_ndv) {
810821
(Some(l), Some(r)) if l > 0 && r > 0 => {
811822
ndv_divisor = ndv_divisor.saturating_mul(l.max(r));

0 commit comments

Comments
 (0)