diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index c2ce4a740ec..7e68a33a697 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -579,6 +579,18 @@ public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext conte (arguments.isEmpty() || arguments.size() == 1) ? Collections.emptyList() : arguments.subList(1, arguments.size()); + // No-arg ranking functions (row_number, rank, dense_rank) bypass + // aggregate signature validation since they have no field arguments. + if (field == null && args.isEmpty()) { + return PlanUtils.makeOver( + context, + functionName, + null, + args, + partitions, + List.of(), + node.getWindowFrame()); + } List nodes = PPLFuncImpTable.INSTANCE.validateAggFunctionSignature( functionName, field, args, context.rexBuilder); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 39f3a6f2d05..003cb5acc21 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -211,6 +211,22 @@ static RexNode makeOver( true, lowerBound, upperBound); + case RANK: + return withOver( + context.relBuilder.aggregateCall(SqlStdOperatorTable.RANK), + partitions, + orderKeys, + true, + lowerBound, + upperBound); + case DENSE_RANK: + return withOver( + context.relBuilder.aggregateCall(SqlStdOperatorTable.DENSE_RANK), + partitions, + orderKeys, + true, + lowerBound, + upperBound); case NTH_VALUE: return withOver( context.relBuilder.aggregateCall(SqlStdOperatorTable.NTH_VALUE, field, argList.get(0)), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 14f058a75d0..7c6545c5d66 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -425,6 +425,9 @@ public enum BuiltinFunctionName { .put("dc", BuiltinFunctionName.DISTINCT_COUNT_APPROX) .put("distinct_count", BuiltinFunctionName.DISTINCT_COUNT_APPROX) .put("pattern", BuiltinFunctionName.INTERNAL_PATTERN) + .put("row_number", BuiltinFunctionName.ROW_NUMBER) + .put("rank", BuiltinFunctionName.RANK) + .put("dense_rank", BuiltinFunctionName.DENSE_RANK) .build(); public static Optional of(String str) { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteEventstatsRankingIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteEventstatsRankingIT.java new file mode 100644 index 00000000000..44dc46a9519 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteEventstatsRankingIT.java @@ -0,0 +1,136 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.MatcherUtils.*; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteEventstatsRankingIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.STATE_COUNTRY); + } + + @Test + public void testEventstatsRowNumber() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eventstats row_number() by country", TEST_INDEX_STATE_COUNTRY)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("row_number()", "bigint")); + verifyNumOfRows(actual, 4); + } + + @Test + public void testEventstatsRank() throws IOException { + JSONObject actual = + executeQuery( + String.format("source=%s | eventstats rank() by country", TEST_INDEX_STATE_COUNTRY)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("rank()", "bigint")); + verifyNumOfRows(actual, 4); + } + + @Test + public void testEventstatsDenseRank() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eventstats dense_rank() by country", TEST_INDEX_STATE_COUNTRY)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("dense_rank()", "bigint")); + verifyNumOfRows(actual, 4); + } + + @Test + public void testStreamstatsRowNumber() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats row_number() by country", TEST_INDEX_STATE_COUNTRY)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("row_number()", "bigint")); + verifyNumOfRows(actual, 4); + } + + @Test + public void testStreamstatsRank() throws IOException { + JSONObject actual = + executeQuery( + String.format("source=%s | streamstats rank() by country", TEST_INDEX_STATE_COUNTRY)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("rank()", "bigint")); + verifyNumOfRows(actual, 4); + } + + @Test + public void testStreamstatsDenseRank() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats dense_rank() by country", TEST_INDEX_STATE_COUNTRY)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("dense_rank()", "bigint")); + verifyNumOfRows(actual, 4); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java index 1a2f5337998..25e7c12ffff 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java @@ -145,6 +145,47 @@ public void testNumericFieldFromString() throws Exception { client().performRequest(deleteRequest); } + @Test + public void testBooleanFieldFromNumberAcrossWildcardIndices() throws Exception { + // Reproduce issue #5269: querying across indices where same field has conflicting types + // (boolean vs text) and the text-typed index stores a numeric value like 0. + String indexBool = "repro_bool_test_bb"; + String indexText = "repro_bool_test_aa"; + + try { + // Create index with boolean mapping + Request createBool = new Request("PUT", "/" + indexBool); + createBool.setJsonEntity( + "{\"mappings\":{\"properties\":{\"flag\":{\"type\":\"boolean\"}," + + "\"startTime\":{\"type\":\"date_nanos\"}}}}"); + client().performRequest(createBool); + + // Create index with text mapping + Request createText = new Request("PUT", "/" + indexText); + createText.setJsonEntity( + "{\"mappings\":{\"properties\":{\"flag\":{\"type\":\"text\"}," + + "\"startTime\":{\"type\":\"date_nanos\"}}}}"); + client().performRequest(createText); + + // Insert boolean value into boolean-typed index + Request insertBool = new Request("PUT", "/" + indexBool + "/_doc/1?refresh=true"); + insertBool.setJsonEntity("{\"startTime\":\"2026-03-25T20:25:00.000Z\",\"flag\":false}"); + client().performRequest(insertBool); + + // Insert numeric value into text-typed index + Request insertText = new Request("PUT", "/" + indexText + "/_doc/1?refresh=true"); + insertText.setJsonEntity("{\"startTime\":\"2026-03-24T20:25:00.000Z\",\"flag\":0}"); + client().performRequest(insertText); + + // Query across both indices with wildcard — should not throw an error + JSONObject result = executeQuery("source=repro_bool_test_* | fields flag"); + assertEquals(2, result.getJSONArray("datarows").length()); + } finally { + client().performRequest(new Request("DELETE", "/" + indexBool)); + client().performRequest(new Request("DELETE", "/" + indexText)); + } + } + @Test public void testBooleanFieldFromString() throws Exception { final int docId = 2; diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5168.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5168.yml new file mode 100644 index 00000000000..aa5a47fed0f --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5168.yml @@ -0,0 +1,104 @@ +setup: + - do: + indices.create: + index: test_issue_5168 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + int_field: + type: integer + str_field: + type: keyword + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test_issue_5168", "_id": "1"}}' + - '{"int_field": 42, "str_field": "alpha"}' + - '{"index": {"_index": "test_issue_5168", "_id": "2"}}' + - '{"int_field": -1, "str_field": "alpha"}' + - '{"index": {"_index": "test_issue_5168", "_id": "3"}}' + - '{"int_field": 0, "str_field": "beta"}' + + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + + - do: + indices.delete: + index: test_issue_5168 + ignore_unavailable: true + +--- +"Issue 5168: eventstats row_number by partition should succeed": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_issue_5168 | eventstats row_number() by str_field + + - match: { total: 3 } + - length: { datarows: 3 } + +--- +"Issue 5168: eventstats rank by partition should succeed": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_issue_5168 | eventstats rank() by str_field + + - match: { total: 3 } + - length: { datarows: 3 } + +--- +"Issue 5168: eventstats dense_rank by partition should succeed": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_issue_5168 | eventstats dense_rank() by str_field + + - match: { total: 3 } + - length: { datarows: 3 } + +--- +"Issue 5168: streamstats rank by partition should succeed": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_issue_5168 | streamstats rank() by str_field + + - match: { total: 3 } + - length: { datarows: 3 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml new file mode 100644 index 00000000000..8c49825e6e2 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml @@ -0,0 +1,63 @@ +setup: + - do: + indices.create: + index: issue5269_bool + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + flag: + type: boolean + startTime: + type: date_nanos + + - do: + indices.create: + index: issue5269_text + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + flag: + type: text + startTime: + type: date_nanos + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5269_bool", "_id": "1"}}' + - '{"startTime": "2026-03-25T20:25:00.000Z", "flag": false}' + - '{"index": {"_index": "issue5269_text", "_id": "1"}}' + - '{"startTime": "2026-03-24T20:25:00.000Z", "flag": 0}' + +--- +teardown: + - do: + indices.delete: + index: issue5269_bool + ignore_unavailable: true + - do: + indices.delete: + index: issue5269_text + ignore_unavailable: true + +--- +"Issue 5269: PPL wildcard query across indices with boolean/text mapping conflict should not error": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5269_* | fields flag + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java index 2944aae77f1..4f6b393cc24 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java @@ -212,6 +212,8 @@ private boolean parseBooleanValue(JsonNode node) { return node.booleanValue(); } else if (node.isTextual()) { return Boolean.parseBoolean(node.textValue()); + } else if (node.isNumber()) { + return node.intValue() != 0; } else { if (LOG.isDebugEnabled()) { LOG.debug("node '{}' must be a boolean", node); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 0734613e522..031b9243f38 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -234,6 +234,9 @@ public void constructIp() { public void constructBoolean() { assertAll( () -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":true}").get("boolV")), + () -> assertEquals(booleanValue(false), tupleValue("{\"boolV\":false}").get("boolV")), + () -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":1}").get("boolV")), + () -> assertEquals(booleanValue(false), tupleValue("{\"boolV\":0}").get("boolV")), () -> assertEquals(booleanValue(true), constructFromObject("boolV", true)), () -> assertEquals(booleanValue(true), constructFromObject("boolV", "true")), () -> assertEquals(booleanValue(true), constructFromObject("boolV", 1)), diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsRankingTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsRankingTest.java new file mode 100644 index 00000000000..24160c0d7fa --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsRankingTest.java @@ -0,0 +1,136 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLEventstatsRankingTest extends CalcitePPLAbstractTest { + + public CalcitePPLEventstatsRankingTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testEventstatsRowNumber() { + String ppl = "source=EMP | eventstats row_number() by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], row_number()=[ROW_NUMBER() OVER (PARTITION BY $7)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " ROW_NUMBER() OVER (PARTITION BY `DEPTNO`) `row_number()`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEventstatsRank() { + String ppl = "source=EMP | eventstats rank() by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], rank()=[RANK() OVER (PARTITION BY $7)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " RANK() OVER (PARTITION BY `DEPTNO`) `rank()`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEventstatsDenseRank() { + String ppl = "source=EMP | eventstats dense_rank() by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], dense_rank()=[DENSE_RANK() OVER (PARTITION BY $7)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " DENSE_RANK() OVER (PARTITION BY `DEPTNO`) `dense_rank()`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEventstatsRowNumberNoPartition() { + String ppl = "source=EMP | eventstats row_number()"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], row_number()=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " ROW_NUMBER() OVER () `row_number()`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testStreamstatsRowNumber() { + String ppl = "source=EMP | streamstats row_number() by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], row_number()=[$9])\n" + + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], row_number()=[ROW_NUMBER()" + + " OVER (PARTITION BY $7)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testStreamstatsRank() { + String ppl = "source=EMP | streamstats rank() by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], rank()=[$9])\n" + + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], rank()=[RANK()" + + " OVER (PARTITION BY $7)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testStreamstatsDenseRank() { + String ppl = "source=EMP | streamstats dense_rank() by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], dense_rank()=[$9])\n" + + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], dense_rank()=[DENSE_RANK()" + + " OVER (PARTITION BY $7)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } +}