@@ -93,7 +93,9 @@ use std::sync::Arc;
9393
9494use datafusion_common:: stats:: Precision ;
9595use datafusion_common:: { Result , Statistics } ;
96+ use datafusion_physical_expr:: PhysicalExpr ;
9697use datafusion_physical_expr:: expression_analyzer:: ExpressionAnalyzerRegistry ;
98+ use datafusion_physical_expr:: expressions:: Column ;
9799
98100use crate :: ExecutionPlan ;
99101
@@ -779,14 +781,23 @@ impl StatisticsProvider for JoinStatisticsProvider {
779781 return Ok ( StatisticsResult :: Delegate ) ;
780782 } ;
781783
782- let default_analyzer;
783- let analyzer = match plan. expression_analyzer_registry ( ) {
784- Some ( r) => r,
785- None => {
786- default_analyzer = ExpressionAnalyzerRegistry :: new ( ) ;
787- & default_analyzer
784+ let analyzer = plan. expression_analyzer_registry ( ) ;
785+
786+ /// Resolve NDV for a join key expression. Uses the ExpressionAnalyzer
787+ /// when available (handles arbitrary expressions), otherwise falls back
788+ /// to direct column statistics lookup (bare column keys only).
789+ fn resolve_key_ndv (
790+ key : & Arc < dyn PhysicalExpr > ,
791+ stats : & Statistics ,
792+ analyzer : Option < & ExpressionAnalyzerRegistry > ,
793+ ) -> Option < usize > {
794+ if let Some ( a) = analyzer {
795+ return a. get_distinct_count ( key, stats) ;
788796 }
789- } ;
797+ key. downcast_ref :: < Column > ( )
798+ . and_then ( |c| stats. column_statistics . get ( c. index ( ) ) )
799+ . and_then ( |s| s. distinct_count . get_value ( ) . copied ( ) )
800+ }
790801
791802 /// Estimate equi-join output using NDV of join key expressions:
792803 /// left_rows * right_rows / product(max(left_ndv_i, right_ndv_i))
@@ -797,15 +808,15 @@ impl StatisticsProvider for JoinStatisticsProvider {
797808 right : & Statistics ,
798809 left_rows : usize ,
799810 right_rows : usize ,
800- analyzer : & ExpressionAnalyzerRegistry ,
811+ analyzer : Option < & ExpressionAnalyzerRegistry > ,
801812 ) -> usize {
802813 if on. is_empty ( ) {
803814 return left_rows. saturating_mul ( right_rows) ;
804815 }
805816 let mut ndv_divisor: usize = 1 ;
806817 for ( left_key, right_key) in on {
807- let left_ndv = analyzer . get_distinct_count ( left_key, left) ;
808- let right_ndv = analyzer . get_distinct_count ( right_key, right) ;
818+ let left_ndv = resolve_key_ndv ( left_key, left, analyzer ) ;
819+ let right_ndv = resolve_key_ndv ( right_key, right, analyzer ) ;
809820 match ( left_ndv, right_ndv) {
810821 ( Some ( l) , Some ( r) ) if l > 0 && r > 0 => {
811822 ndv_divisor = ndv_divisor. saturating_mul ( l. max ( r) ) ;
0 commit comments