From 8202e38c6a065b181b9d54dc8bc378543cf8fca9 Mon Sep 17 00:00:00 2001 From: Surendra Singh Lilhore Date: Thu, 14 May 2026 12:16:43 +0000 Subject: [PATCH 1/2] [GLUTEN-12094][VL]Strip default comparator from array_sort for Velox offloading Detect Spark's default null-handling comparator in ArraySort and strip the lambda so Velox uses its 1-arg array_sort (ascending, nulls-last) which has identical semantics. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../velox/VeloxSparkPlanExecApi.scala | 29 ++++++++++++++++- .../gluten/execution/MiscOperatorSuite.scala | 32 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index b7a1e172b2c8..7bedeecd2224 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -210,7 +210,34 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with Logging { argument: ExpressionTransformer, function: ExpressionTransformer, expr: ArraySort): ExpressionTransformer = { - GenericExpressionTransformer(substraitExprName, Seq(argument, function), expr) + // Spark's default array_sort comparator includes null-handling logic that + // Velox's rewriteArraySortCall cannot parse. Detect it and strip the lambda + // so Velox uses its 1-arg array_sort (ascending, nulls-last) which has + // identical semantics. + if (isDefaultArraySortComparator(expr)) { + logInfo("Stripping default comparator from array_sort for Velox offloading") + GenericExpressionTransformer(substraitExprName, Seq(argument), expr) + } else { + GenericExpressionTransformer(substraitExprName, Seq(argument, function), expr) + } + } + + /** + * Checks whether the ArraySort expression uses Spark's default comparator. The default comparator + * wraps the simple ascending comparison in null-handling if-else logic that Velox's + * SimpleComparisonMatcher cannot parse. Since Velox's 1-arg array_sort already provides ascending + * sort with nulls-last (matching Spark's default semantics), we can safely strip the comparator + * lambda. + */ + private def isDefaultArraySortComparator(expr: ArraySort): Boolean = { + expr.function match { + case LambdaFunction(body, args, _) if args.size == 2 => + val left = args(0) + val right = args(1) + val defaultWithNulls = ArraySort.comparator(left, right) + body.semanticEquals(defaultWithNulls) + case _ => false + } } /** Transform array exists to Substrait */ diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala index fec55eff09e8..15ad96b34803 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala @@ -1244,6 +1244,38 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa } } + test("array_sort - default comparator offloading") { + withTable("t_arr_sort_default") { + sql("create table t_arr_sort_default (a array) using parquet") + sql( + "insert into t_arr_sort_default values " + + "(array(3, 1, 2, 5, 4)), (array(10, 20, 30)), (array(null, 2, 1)), (array())") + + // Default ascending sort (no lambda) — Spark generates a null-handling + // comparator that Velox cannot parse without the stripping logic. + runQueryAndCompare("select a, array_sort(a) from t_arr_sort_default") { + checkGlutenPlan[ProjectExecTransformer] + } + + // sort_array also uses default comparator under the hood + runQueryAndCompare("select a, sort_array(a) from t_arr_sort_default") { + checkGlutenPlan[ProjectExecTransformer] + } + + // Descending sort_array + runQueryAndCompare("select a, sort_array(a, false) from t_arr_sort_default") { + checkGlutenPlan[ProjectExecTransformer] + } + + // Custom comparator (descending) — must still be offloaded with 2-arg form + runQueryAndCompare( + "select a, array_sort(a, (l, r) -> " + + "IF(l > r, -1, IF(l < r, 1, 0))) from t_arr_sort_default") { + checkGlutenPlan[ProjectExecTransformer] + } + } + } + test("Support bool type filter in scan") { withTable("t") { sql("create table t (id int, b boolean) using parquet") From f4b931547c90c99f4c39df6c0a4659a7f80cb006 Mon Sep 17 00:00:00 2001 From: Surendra Singh Lilhore Date: Thu, 14 May 2026 17:06:33 +0000 Subject: [PATCH 2/2] Fix scalastyle non-ASCII characters in MiscOperatorSuite Replace em dash characters with regular hyphens to pass scalastyle nonascii check. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../scala/org/apache/gluten/execution/MiscOperatorSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala index 15ad96b34803..a796399b672b 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala @@ -1251,7 +1251,7 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa "insert into t_arr_sort_default values " + "(array(3, 1, 2, 5, 4)), (array(10, 20, 30)), (array(null, 2, 1)), (array())") - // Default ascending sort (no lambda) — Spark generates a null-handling + // Default ascending sort (no lambda) - Spark generates a null-handling // comparator that Velox cannot parse without the stripping logic. runQueryAndCompare("select a, array_sort(a) from t_arr_sort_default") { checkGlutenPlan[ProjectExecTransformer] @@ -1267,7 +1267,7 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa checkGlutenPlan[ProjectExecTransformer] } - // Custom comparator (descending) — must still be offloaded with 2-arg form + // Custom comparator (descending) - must still be offloaded with 2-arg form runQueryAndCompare( "select a, array_sort(a, (l, r) -> " + "IF(l > r, -1, IF(l < r, 1, 0))) from t_arr_sort_default") {