diff --git a/.claude/commands/ppl-bugfix.md b/.claude/commands/ppl-bugfix.md new file mode 100644 index 00000000000..f2ac6d6807d --- /dev/null +++ b/.claude/commands/ppl-bugfix.md @@ -0,0 +1,162 @@ +--- +allowed-tools: Agent, Read, Bash(gh:*), Bash(git:*) +description: Run the PPL bugfix harness for a GitHub issue or follow up on an existing PR +--- + +Fix a PPL bug or follow up on an existing PR using the harness in `.claude/harness/ppl-bugfix-harness.md`. + +## Input + +Accepts one or more issue/PR references. Multiple references are processed in parallel (each gets its own subagent + worktree). + +- `/ppl-bugfix #1234` — single issue +- `/ppl-bugfix PR#5678` — single PR +- `/ppl-bugfix #1234 #5678 PR#9012` — multiple in parallel +- `/ppl-bugfix https://github.com/opensearch-project/sql/issues/1234` — URL + +Optional mode flag (append to any of the above): +- `--safe` — `acceptEdits` mode. Auto-approve file edits only, Bash commands require manual approval. (Most conservative) +- `--yolo` — `bypassPermissions` mode. Fully trusted, no prompts. Subagent runs in an isolated worktree so this is safe. (Default) + +> **Note**: `bypassPermissions` skips the interactive prompt but still respects the allow-list in `~/.claude/settings.json`. Ensure git/gh write commands are in the global allow-list. + +Examples: +- `/ppl-bugfix #1234` — single issue, defaults to yolo +- `/ppl-bugfix #1234 #5678 --yolo` — two issues in parallel +- `/ppl-bugfix PR#5293 PR#5300` — two PRs in parallel +- `/ppl-bugfix #1234 PR#5678 --safe` — mix of issue and PR + +If no argument given, ask for an issue or PR number. + +## Step 0: Resolve Permission Mode + +Parse the mode flag from the input arguments: + +| Flag | Mode | +|------|------| +| `--safe` | `acceptEdits` | +| `--yolo` | `bypassPermissions` | +| _(no flag)_ | `bypassPermissions` (default) | + +Use the resolved mode as the `mode` parameter when dispatching the subagent in Step 2A/2B. + +## Step 1: Resolve Each Reference + +For each issue/PR reference in the input, resolve its state. Run these lookups in parallel when there are multiple references. + +```bash +# Issue → PR (check multiple closing keyword variants) +gh pr list --search "Resolves #" --json number,url,state --limit 5 +gh pr list --search "Fixes #" --json number,url,state --limit 5 +gh pr list --search "Closes #" --json number,url,state --limit 5 + +# PR → Issue +gh pr view --json body | jq -r '.body' | grep -oiE '(resolves|fixes|closes) #[0-9]+' | grep -oE '[0-9]+' +``` + +| State | Action | +|-------|--------| +| Issue exists, no PR | **Initial Fix** (Step 2A) | +| Issue exists, open PR found | **Follow-up** (Step 2B) | +| PR provided directly | **Follow-up** (Step 2B) | + +## Step 2: Dispatch Subagents + +Dispatch one subagent per reference. When there are multiple references, dispatch all subagents in a single message (parallel execution). + +### 2A: Initial Fix + +``` +Agent( + mode: "", + isolation: "worktree", + name: "bugfix-", + description: "PPL bugfix #", + prompt: "Read .claude/harness/ppl-bugfix-harness.md and follow it to fix GitHub issue #. + Follow Phase 0 through Phase 3 in order. + Phase 0.3 defines TDD execution flow. Do NOT skip any phase. + CRITICAL: If Phase 0.1 determines the bug is already fixed on main, HARD STOP. + Do NOT write tests, do NOT create a PR — just comment/close the issue and report back. + If the bug IS reproducible, post the Decision Log (Phase 3.4) before completing." +) +``` + +### 2B: Follow-up + +Before dispatching, check if an existing worktree already has the PR branch checked out: + +```bash +# List worktrees and find one on the PR branch +for wt in .claude/worktrees/agent-*/; do + branch=$(git -C "$wt" branch --show-current 2>/dev/null) + if [ "$branch" = "" ]; then + echo "REUSE: $wt (branch: $branch)" + fi +done +``` + +**If existing worktree found**: Do NOT use `isolation: "worktree"`. Pass the worktree path in the prompt so the subagent works there directly. + +``` +Agent( + mode: "", + name: "bugfix-", + description: "PPL bugfix # followup", + prompt: "cd first, then read .claude/harness/ppl-bugfix-followup.md and follow it. + PR: (), Issue: # + Working directory: " +) +``` + +**If no existing worktree**: Create a new one. + +``` +Agent( + mode: "", + isolation: "worktree", + name: "bugfix-", + description: "PPL bugfix # followup", + prompt: "Read .claude/harness/ppl-bugfix-followup.md and follow it. + PR: (), Issue: #" +) +``` + +## Step 3: Report Back + +After all subagents complete, report a summary for each: +- Classification, fix summary, PR URL, worktree path and branch, items needing human attention (2A) +- What was addressed, current PR state, whether another round is needed (2B) + +**Always include the worktree→PR mapping** from the subagent's output, e.g.: + +``` +Worktree: /path/to/.claude/worktrees/agent-xxxx +Branch: bugfix-1234 +PR: #5678 +``` + +**Important**: After reporting, the main agent must remember this mapping. When the user later asks to make changes to the PR (e.g., "commit this to PR #5678"), operate in the worktree directory — not the main session directory. + +## Subagent Lifecycle + +Subagents are task-scoped. They complete and release context — they cannot poll for events. + +``` +Agent A (Phase 0-3) → creates PR → completes + (CI runs, reviewers comment, conflicts arise) +Agent B (Phase 3.5) → handles feedback → completes + (repeat as needed) +Agent N (Phase 3.5) → gh pr ready → done +``` + +Context is preserved across agents via: +- **Decision Log** (PR comment) — single source of truth for rejected alternatives, pitfalls, design rationale +- **GitHub state** (PR diff, review comments, CI logs) — reconstructed by each follow-up agent + +## Rules + +- Subagent reads `.claude/harness/ppl-bugfix-harness.md` and fetches issue/PR details itself — do NOT inline content into the prompt +- If bug is not reproducible (Phase 0.1), stop and report — do not proceed +- Issue ↔ PR auto-resolution means the user never needs to track PR numbers manually +- **Do NOT use `mode: "auto"` for subagents** — `auto` mode does not work for subagents; Bash commands still require manual approval. Only `bypassPermissions` reliably skips permission checks. +- **Always dispatch subagent** — even for trivial follow-ups (remove co-author, force push). Do NOT run commands directly in the main session; subagents with `bypassPermissions` skip permission prompts, the main session does not. diff --git a/.claude/harness/ppl-bugfix-followup.md b/.claude/harness/ppl-bugfix-followup.md new file mode 100644 index 00000000000..4b31ab6029e --- /dev/null +++ b/.claude/harness/ppl-bugfix-followup.md @@ -0,0 +1,125 @@ +# PPL Bugfix Follow-up + +## Rules + +- Do NOT add `Co-Authored-By` lines in commits — only DCO `Signed-off-by` + +--- + +## Report Working Directory + +```bash +echo "Worktree: $(pwd)" +echo "Branch: $(git branch --show-current)" +``` + +Include this in your output so the caller knows where changes are happening. + +## Reconstruct Context + +First checkout the PR branch, then load state: + +```bash +# Checkout the PR branch in this worktree +gh pr checkout + +# Resolve fork remote — the worktree may only have origin (upstream) +git remote -v +# If no fork remote exists, add it: +git remote add fork https://github.com//sql.git + +# Load PR state — reviews, CI, mergeability +gh pr view --json title,body,state,reviews,statusCheckRollup,mergeable +gh pr checks + +# Load ALL comments — includes bot comments (Code-Diff-Analyzer, PR Reviewer Guide, Code Suggestions) and human comments +gh pr view --json comments --jq '.comments[] | {author: .author.login, body: .body}' +``` + +Categorize ALL signals — not just CI and human reviews: + +| Signal | Type | +|--------|------| +| `statusCheckRollup` has failures | CI failure | +| `reviews` has CHANGES_REQUESTED | Review feedback | +| `mergeable` is CONFLICTING | Merge conflict | +| Bot comments with actionable suggestions | Review feedback (treat like human review) | +| All pass + approved | Ready — run `gh pr ready` | + +## Handle Review Feedback + +For each comment (human OR bot), **cross-check against the Decision Log first**: + +| Type | Action | +|------|--------| +| Code change | If already rejected in Decision Log, reply with reasoning. Otherwise make the change, new commit, push | +| Question | Reply with explanation — Decision Log often has the answer | +| Nit | Fix if trivial | +| Disagreement | Reply with Decision Log reasoning; if reviewer insists, escalate to user | + +```bash +git add && git commit -s -m "Address review feedback: " +git push -u fork +``` + +## Clean Up Commit History + +When you need to amend a commit (e.g. remove Co-Authored-By, reword message) and the branch has a merge commit on top, don't try `git reset --soft origin/main` — it will include unrelated changes if main has moved. Instead cherry-pick the fix onto latest main: + +```bash +git checkout -B clean-branch origin/main +git cherry-pick +git commit --amend -s -m "" +git push fork clean-branch: --force-with-lease +``` + +## Handle CI Failures + +```bash +gh pr checks # Identify failures +gh run view --log-failed # Read logs +# Test failure → fix locally, push new commit +# Spotless → ./gradlew spotlessApply, push +# Flaky → gh run rerun --failed +``` + +## Handle Merge Conflicts + +```bash +git fetch origin && git merge origin/main # Resolve conflicts +./gradlew spotlessApply && ./gradlew test && ./gradlew :integ-test:integTest # Re-verify +git commit -s -m "Resolve merge conflicts with main" +git push -u fork +``` + +## Mark Ready + +```bash +gh pr ready +``` + +## Retrospective + +After handling follow-up, reflect on the feedback received and check if it reveals gaps in the harness or command: + +For each comment addressed (bot or human): +- **Does the feedback point to a pattern the harness should have prevented?** → Add guidance to the relevant Phase in `ppl-bugfix-harness.md` +- **Was this a repeated mistake across PRs?** → Add to Quick Reference or Case Index +- **Did the harness template produce the problematic code?** → Fix the template directly +- **Was a permission or tool missing?** → Add to `.claude/settings.json` +- **Did the follow-up workflow itself miss this signal?** → Update this file + +If any improvement is needed, make the edit and include it in the same commit. + +## Completion Gate + +Before reporting "done": + +1. Run `git status --porcelain` — if any uncommitted changes remain, commit and push them. This includes harness edits from Retrospective. +2. Report in your final output: + +``` +Worktree: +Branch: +PR: +``` diff --git a/.claude/harness/ppl-bugfix-harness.md b/.claude/harness/ppl-bugfix-harness.md new file mode 100644 index 00000000000..54c2103fc9b --- /dev/null +++ b/.claude/harness/ppl-bugfix-harness.md @@ -0,0 +1,169 @@ +# PPL Bugfix Harness + +## Phase 0: Triage + +### 0.0 Report Working Directory + +```bash +echo "Worktree: $(pwd)" +echo "Branch: $(git branch --show-current)" +``` + +Include this in your output so the caller knows where changes are happening. + +### 0.1 Load & Reproduce + +```bash +gh issue view --repo opensearch-project/sql +``` + +Write a failing test or run an existing one to reproduce the bug on `main`. + +If the bug **does not reproduce** (correct results, not infra failure): + +| Finding | Action | +|---------|--------| +| Already fixed | `gh issue comment` + `gh issue close` | +| Older version only | `gh issue comment` + `gh issue close` | +| Intermittent | Label `flaky` or `needs-info`, do NOT close | +| Can't reproduce | Comment asking for repro steps, label `needs-info` | + +**HARD STOP** — do not proceed. Report back. + +### 0.2 Classify + +Identify the bug layer (Grammar, AST/Functions, Type System, Optimizer, Execution, DI/Resource) and record it. Consult `.claude/harness/ppl-bugfix-reference.md` for fix-path-specific guidance if needed. + +### 0.3 Guardrails + +Stop and report back if: +- Root cause unclear after reading 15+ source files +- Fix breaks 5+ unrelated tests +- Same build error 3 times in a row + +### 0.4 Execution Flow + +``` +Triage → Write FAILING test → Fix → Remaining tests → Verify → Commit → PR → Decision Log → Completion Gate +``` + +--- + +## Phase 1: Fix + +Find and fix the root cause. Consult `.claude/harness/ppl-bugfix-reference.md` for path-specific patterns and examples. + +--- + +## Phase 2: Tests + +Consult `.claude/harness/ppl-bugfix-reference.md` for test templates. + +Required deliverables: +- Failing test reproducing the bug (written BEFORE the fix) +- Unit tests covering happy path and edge cases +- Integration test — add to an existing `*IT.java` when possible; if creating a new one, add it to `CalciteNoPushdownIT` +- YAML REST test at `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/.yml` + +--- + +## Phase 3: Verify & Submit + +### 3.1 Verify + +```bash +./gradlew spotlessApply +./gradlew ::test --tests "" +./gradlew test +./gradlew :integ-test:integTest -Dtests.class="*" +``` + +Run `./gradlew :integ-test:yamlRestTest` if YAML tests were added. Run `./gradlew generateGrammarSource && ./gradlew :ppl:test` if grammar was modified. + +### 3.2 Commit & PR + +```bash +git add +git commit -s -m "[BugFix] Fix (#)" +git fetch origin && git merge origin/main +./gradlew test && ./gradlew :integ-test:integTest -Dtests.class="*" + +# Resolve fork remote (check git remote -v; add if missing) +git remote add fork https://github.com//sql.git +git push -u fork +``` + +Do NOT add Co-Authored-By lines. Use the git user name to infer the fork owner, or fall back to "qianheng-aws". + +```bash +gh pr create --draft --repo opensearch-project/sql \ + --title "[BugFix] Fix (#)" \ + --body "$(cat <<'EOF' +### Description + + +### Related Issues +Resolves # + +### Check List +- [x] New functionality includes testing +- [x] Commits signed per DCO (`-s`) +- [x] `spotlessCheck` passed +- [x] Unit tests passed +- [x] Integration tests passed +EOF +)" +``` + +### 3.3 Decision Log + +Post as a PR comment: + +```bash +gh pr comment --body "$(cat <<'EOF' +## Decision Log +**Root Cause**: +**Approach**: +**Alternatives Rejected**: +**Pitfalls**: +**Things to Watch**: +EOF +)" +``` + +--- + +## Completion Gate + +Run `git status --porcelain` — if any uncommitted changes remain, commit and push them before proceeding. + +Do NOT report "done" until every item below is checked. List each in your final report: + +- [ ] **Unit tests**: New test class or methods +- [ ] **Integration test**: New `*IT.java` test +- [ ] **YAML REST test**: `issues/.yml` +- [ ] **spotlessApply**: Ran successfully +- [ ] **Tests pass**: Affected modules +- [ ] **Commit**: DCO sign-off, `[BugFix]` prefix, no Co-Authored-By +- [ ] **Draft PR**: `--draft`, body contains `Resolves #` +- [ ] **Decision Log**: PR comment posted + +If any item is blocked, report which and why. + +--- + +## Phase 4: Retrospective + +- [ ] Symptom in Quick Reference? Add if missing. +- [ ] Classification correct? Fix routing if misleading. +- [ ] Test template worked as-is? Fix if broken. +- [ ] New pattern? Add to Case Index. + +Include harness improvements in the same PR. + +Report in your final output: +``` +Worktree: +Branch: +PR: +``` diff --git a/.claude/harness/ppl-bugfix-reference.md b/.claude/harness/ppl-bugfix-reference.md new file mode 100644 index 00000000000..20697c83ecb --- /dev/null +++ b/.claude/harness/ppl-bugfix-reference.md @@ -0,0 +1,157 @@ +# PPL Bugfix Reference + +Consult this file when you need fix-path-specific guidance or test templates. + +--- + +## Fix Path Reference + +### Path A — Grammar / Parser + +1. Update grammar files (must stay in sync): + - `language-grammar/src/main/antlr4/OpenSearchPPLParser.g4` (primary) + - `ppl/src/main/antlr/OpenSearchPPLParser.g4` + - `async-query-core/src/main/antlr/OpenSearchPPLParser.g4` (if applicable) +2. Regenerate: `./gradlew generateGrammarSource` +3. Update AstBuilder: `ppl/.../parser/AstBuilder.java` +4. Test: `AstBuilderTest` + +### Path B — AST / Function Implementation + +1. AST nodes in `core/.../ast/tree/`, functions in `core/.../expression/function/` or `PPLBuiltinOperators` +2. Watch Visitor pattern — sync `AbstractNodeVisitor`, `Analyzer`, `CalciteRelNodeVisitor`, `PPLQueryDataAnonymizer` +3. Test: `verifyLogical()`, `verifyPPLToSparkSQL()`, `verifyResult()` +4. **Before writing a new function-name → Calcite-op switch, try to reuse the existing visitor** + (`aggVisitor` / `rexVisitor` / `CalciteAggCallVisitor` / `CalciteRexNodeVisitor`). If the issue + is that a shared visitor resolves field references against the wrong row (e.g., wrong side of a + join), rewrite the AST field references to reference the correct names and delegate instead of + duplicating the AVG/SUM/MIN/MAX/STDDEV/... mapping by hand. + +### Path C — Type System / Semantic Analysis + +1. `OpenSearchTypeFactory.java`, `Analyzer.java`, `ExpressionAnalyzer.java` +2. Preserve nullable semantics; protect UDT from `leastRestrictive()` downgrade +3. Test: type preservation, nullable propagation, mixed types + +### Path D — Optimizer / Predicate Pushdown + +1. `PredicateAnalyzer.java`, `LogicalPlanOptimizer`, `QueryService.java` +2. Watch `nullAs` semantics; for plan bloat consider `FilterMergeRule` +3. Verify: `EXPLAIN` output + integration test correctness + +### Path E — Execution / Resource Management + +1. `OpenSearchExecutionEngine.java`, `SQLPlugin.java`, `OpenSearchPluginModule.java` +2. Common patterns: cache key collision, memory leak, unbounded growth, non-singleton, DI not injected + +--- + +## Test Templates + +**Unit test** (extend `CalcitePPLAbstractTest`): +```java +public class CalcitePPLYourFixTest extends CalcitePPLAbstractTest { + public CalcitePPLYourFixTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Before + public void init() { + doReturn(true).when(settings) + .getSettingValue(Settings.Key.CALCITE_ENGINE_ENABLED); + } + + @Test + public void testBugScenario() { + verifyLogical("source=EMP | where SAL > 1000", + "LogicalFilter(condition=[>($5, 1000)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"); + } +} +``` + +**Integration test** (extend `CalcitePPLIT`): +```java +public class CalcitePPLYourFixIT extends CalcitePPLIT { + @Override + public void init() throws IOException { + super.init(); + enableCalcite(); + } + + @Test + public void testBugFixEndToEnd() throws IOException { + JSONObject result = executeQuery("source= | "); + verifySchema(result, schema("field", "alias", "type")); + verifyDataRows(result, rows("expected_value_1"), rows("expected_value_2")); + } +} +``` + +**YAML REST test** — place at `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/.yml`: +```yaml +setup: + - do: + indices.create: + index: test_issue_ + body: + settings: { number_of_shards: 1, number_of_replicas: 0 } + mappings: { properties: { : { type: } } } + - do: + query.settings: + body: { transient: { plugins.calcite.enabled: true } } +--- +teardown: + - do: + query.settings: + body: { transient: { plugins.calcite.enabled: false } } +--- +"": + - skip: { features: [headers, allowed_warnings] } + - do: + bulk: { index: test_issue_, refresh: true, body: ['{"index": {}}', '{"": ""}'] } + - do: + headers: { Content-Type: 'application/json' } + ppl: { body: { query: "source=test_issue_ | " } } + - match: { total: } + - match: { datarows: [ [ , ], [ , ] ] } +``` + +> **Always include `datarows` assertions** — verifying only `total` and `schema` will miss +> wrong values. Count the expected output groups carefully (e.g., for `chart ... by `, +> count distinct (row_split, col_split) groups after null filtering, not the number of input rows). + +--- + +## Symptom → Fix Path + +``` +SyntaxCheckException / unrecognized syntax → Path A +SemanticCheckException / type mismatch → Path C +Field type wrong (timestamp→string) → Path C +EXPLAIN shows predicate not pushed down → Path D +Multi-condition query: missing/extra rows → Path D +OOM / memory growth over time → Path E +NPE in Transport layer → Path E +"node must be boolean/number, found XXX" → Path B +Regex/function extraction offset → Path B +``` + +--- + +## Case Index + +| Commit | Bug | Layer | Tests | +|--------|-----|-------|-------| +| `ada2e34` | UNION loses UDT type | Type System | 8 UT + 4 IT | +| `26674f9` | rex capture group index shift | AST/Functions | Multiple UTs | +| `b4df010` | isnotnull not pushed down with != | Optimizer | 2 UT + IT | +| `e045d15` | Multiple filters OOM | Optimizer | 26 output updates | +| `f024b4f` | High-cardinality GROUP BY OOM | Execution | Benchmark | +| `97d5d26` | OrdinalMap cache collision + leak | Execution | — | +| `90393bf` | Non-singleton ExecutionEngine leak | Resource | — | +| `f6be830` | Transport extensions not injected | DI | — | +| `734394d` | Grammar rule typo | Grammar | — | +| `246ed0d` | Float precision flaky test | Test Infra | — | +| `d56b8fa` | Wildcard index type conflict | Value Parsing | 3 UT + 1 IT + 1 YAML | +| `5a78b78` | Boolean coercion from numeric in wildcard queries | Value Parsing | 3 UT + 1 IT + 1 YAML | diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000000..eae8ab7e33d --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,28 @@ +{ + "permissions": { + "allow": [ + "Bash(./gradlew *)", + "Bash(gh issue:*)", + "Bash(gh pr:*)", + "Bash(gh api:*)", + "Bash(gh search:*)", + "Bash(gh run:*)", + "Bash(git add:*)", + "Bash(git commit:*)", + "Bash(git stash:*)", + "Bash(git show:*)", + "Bash(git diff:*)", + "Bash(git status:*)", + "Bash(git log:*)", + "Bash(git branch:*)", + "Bash(git remote:*)", + "Bash(git fetch:*)", + "Bash(git checkout:*)", + "Bash(git push -u:*)", + "Bash(git push --force-with-lease:*)", + "Bash(git merge:*)", + "Bash(git cherry-pick:*)", + "Bash(git reset --soft:*)" + ] + } +} diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7feb3fe0b46..144a9e3589c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,2 @@ # This should match the owning team set up in https://github.com/orgs/opensearch-project/teams -* @ps48 @kavithacm @derek-ho @joshuali925 @dai-chen @YANG-DB @mengweieric @vamsimanohar @swiddis @penghuo @seankao-az @MaxKsyunz @Yury-Fridlyand @anirudha @forestmvey @acarbonetto @GumpacG @ykmr1224 @LantaoJin @noCharger @qianheng-aws @yuancu @RyanL1997 @ahkcs +* @ps48 @joshuali925 @dai-chen @mengweieric @vamsimanohar @swiddis @penghuo @anirudha @acarbonetto @ykmr1224 @LantaoJin @noCharger @qianheng-aws @yuancu @RyanL1997 @ahkcs @songkant-aws diff --git a/.github/workflows/analytics-engine-compat.yml b/.github/workflows/analytics-engine-compat.yml new file mode 100644 index 00000000000..9c3bd9c9f99 --- /dev/null +++ b/.github/workflows/analytics-engine-compat.yml @@ -0,0 +1,44 @@ +name: Analytics Engine Compatibility + +on: + pull_request: + push: + branches-ignore: + - 'backport/**' + - 'dependabot/**' + paths: + - '**/*.java' + - '**gradle*' + - 'integ-test/**' + - '.github/workflows/analytics-engine-compat.yml' + merge_group: + +jobs: + Get-CI-Image-Tag: + uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main + with: + product: opensearch + + analytics-engine-compat: + needs: Get-CI-Image-Tag + runs-on: ubuntu-latest + container: + image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} + + steps: + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} + + - uses: actions/checkout@v4 + + - name: Set up JDK 25 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 25 + + - name: Run analytics-engine compatibility smoke test + run: | + chown -R 1000:1000 `pwd` + su `id -un 1000` -c "./gradlew :integ-test:analyticsEngineCompatIT" diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml new file mode 100644 index 00000000000..249bbc766b6 --- /dev/null +++ b/.github/workflows/dependabot.yml @@ -0,0 +1,15 @@ +version: 2 +updates: + - package-ecosystem: "gradle" + directory: "/" + schedule: + interval: "weekly" + day: "monday" + time: "08:00" + timezone: "America/Los_Angeles" + labels: + - "skip-changelog" + group: + all-dependencies: + patterns: + - "*" diff --git a/.github/workflows/issue-dedupe.yml b/.github/workflows/issue-dedupe.yml new file mode 100644 index 00000000000..b31dd85b6cc --- /dev/null +++ b/.github/workflows/issue-dedupe.yml @@ -0,0 +1,43 @@ +--- +name: Issue Dedupe Main +on: + issues: + types: [opened] + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + inputs: + job: + description: 'Job to run' + required: true + type: choice + options: + - detect + - auto-close + default: detect + issue_number: + description: 'Issue number to check for duplicates (detect only)' + required: false + type: string + +jobs: + detect: + if: (github.event_name == 'issues' && github.event.issue.user.type != 'Bot') || (github.event_name == 'workflow_dispatch' && inputs.job == 'detect') + uses: opensearch-project/opensearch-build/.github/workflows/issue-dedupe-detect.yml@main + permissions: + contents: read + issues: write + id-token: write + secrets: + BEDROCK_ACCESS_ROLE_ISSUE_DEDUPE: ${{ secrets.BEDROCK_ACCESS_ROLE_ISSUE_DEDUPE }} + with: + issue_number: ${{ inputs.issue_number || '' }} + grace_days: ${{ vars.DUPLICATE_GRACE_DAYS || '7' }} + + auto-close: + if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && inputs.job == 'auto-close') + uses: opensearch-project/opensearch-build/.github/workflows/issue-dedupe-autoclose.yml@main + permissions: + issues: write + with: + grace_days: ${{ vars.DUPLICATE_GRACE_DAYS || '7' }} diff --git a/.github/workflows/sql-cli-integration-test.yml b/.github/workflows/sql-cli-integration-test.yml index 63f3e91d334..0a0695bbd76 100644 --- a/.github/workflows/sql-cli-integration-test.yml +++ b/.github/workflows/sql-cli-integration-test.yml @@ -69,6 +69,7 @@ jobs: echo "Building SQL modules from current branch..." ./gradlew publishToMavenLocal -x test -x integTest echo "SQL modules published to Maven Local" + ./gradlew clean - name: Run SQL CLI tests with local SQL modules working-directory: sql-cli diff --git a/.github/workflows/stalled.yml b/.github/workflows/stalled.yml index 62b85cecd7b..57513a24653 100644 --- a/.github/workflows/stalled.yml +++ b/.github/workflows/stalled.yml @@ -27,3 +27,4 @@ jobs: days-before-pr-close: -1 days-before-issue-close: -1 exempt-draft-pr: true + exempt-pr-labels: 'no-stall' diff --git a/.gitignore b/.gitignore index 329348a7c12..bf9002f999d 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,10 @@ http-client.env.json .factorypath # Coding agent files (could be symlinks) -.claude +.claude/* +!.claude/settings.json +!.claude/commands/ +!.claude/harness/ +.claude/settings.local.json .clinerules memory-bank \ No newline at end of file diff --git a/.whitesource b/.whitesource index db4b0fec82c..9765a1d58b9 100644 --- a/.whitesource +++ b/.whitesource @@ -11,5 +11,11 @@ }, "issueSettings": { "minSeverityLevel": "LOW" + }, + "remediateSettings": { + "addLabels": ["skip-changelog"], + "workflowRules": { + "enabled": true + } } -} \ No newline at end of file +} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000000..03f88a60042 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,133 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +OpenSearch SQL plugin — enables SQL and PPL (Piped Processing Language) queries against OpenSearch. This is a multi-module Gradle project (Java 21) that functions as an OpenSearch plugin. + +## Build Commands + +```bash +./gradlew build # Full build (compiles, tests, checks) +./gradlew build -x integTest # Fast build (skip integration tests) +./gradlew :core:build # Build specific module +./gradlew test # Unit tests only +./gradlew :core:test --tests "*.AnalyzerTest" # Single test class +./gradlew :integ-test:integTest # Integration tests +./gradlew :integ-test:integTest -Dtests.class="*QueryIT" # Single IT +./gradlew spotlessCheck # Check formatting +./gradlew spotlessApply # Auto-fix formatting +./gradlew generateGrammarSource # Regenerate ANTLR parsers +``` + +## Code Style + +- **Google Java Format** enforced via Spotless (2-space indent, 100 char line limit) +- **Lombok** is used throughout — `@Getter`, `@Builder`, `@RequiredArgsConstructor`, etc. +- **License header** required on all Java files (Apache 2.0). Missing headers fail the build. +- Pre-commit hooks run `spotlessApply` automatically +- All commits must include a DCO sign-off: `Signed-off-by: Name ` (use `git commit -s`). + +## Architecture + +### Query Pipeline + +``` +User Query (SQL/PPL) + → Parsing (ANTLR) — produces parse tree + → AST Construction (AstBuilder visitor) — produces UnresolvedPlan + → Semantic Analysis (Analyzer) — resolves symbols/types → LogicalPlan + → Planning (Planner + LogicalPlanOptimizer) — produces PhysicalPlan + → Execution (ExecutionEngine) — streams ExprValue results + → Response Formatting (ResponseFormatter — JSON/CSV/JDBC) +``` + +### Module Dependency Graph + +``` +plugin (OpenSearch plugin entry point, Guice DI wiring) + ├── sql — SQL parsing (ANTLR → AST via SQLSyntaxParser/AstBuilder) + ├── ppl — PPL parsing (ANTLR → AST via PPLSyntaxParser/AstBuilder) + ├── core — Central module: Analyzer, Planner, ExecutionEngine interfaces, + │ AST/LogicalPlan/PhysicalPlan node types, expression system, type system + ├── opensearch — OpenSearch storage engine, execution engine, client + ├── protocol — Response formatters (JSON, CSV, JDBC, YAML) + ├── common — Shared settings and utilities + ├── legacy — V1 SQL engine (backward compatibility fallback) + ├── datasources — Multi-datasource support (Glue, Security Lake, Prometheus) + ├── async-query / async-query-core — Spark-based async query execution + ├── direct-query / direct-query-core — Direct external datasource queries + └── language-grammar — Centralized ANTLR .g4 grammar files +``` + +`core` has no dependency on other modules. `sql` and `ppl` depend on `core` and `language-grammar`. `opensearch` implements `core` interfaces. + +### Key Source Locations + +| Area | Key Files | +|------|-----------| +| Plugin entry | `plugin/.../SQLPlugin.java`, `plugin/.../OpenSearchPluginModule.java` | +| SQL parsing | `sql/.../sql/parser/AstBuilder.java`, `sql/.../SQLService.java` | +| PPL parsing | `ppl/.../ppl/parser/AstBuilder.java`, `ppl/.../PPLService.java` | +| ANTLR grammars | `language-grammar/src/main/antlr4/` (OpenSearchSQLParser.g4, OpenSearchPPLParser.g4) | +| Analysis | `core/.../analysis/Analyzer.java`, `core/.../analysis/ExpressionAnalyzer.java` | +| Planning | `core/.../planner/Planner.java`, `core/.../planner/logical/LogicalPlan.java` | +| Execution | `core/.../executor/ExecutionEngine.java`, `opensearch/.../OpenSearchExecutionEngine.java` | +| Storage | `opensearch/.../storage/OpenSearchStorageEngine.java` | +| Query orchestration | `core/.../executor/QueryService.java`, `core/.../executor/QueryPlanFactory.java` | + +### Core Abstractions + +- **`Node`** — Base AST node with visitor pattern support +- **`UnresolvedPlan`** / **`LogicalPlan`** / **`PhysicalPlan`** — Query plan hierarchy (unresolved → logical → physical) +- **`Expression`** — Resolved expression with `valueOf()` and `type()` +- **`ExprValue`** — Runtime value types (ExprIntegerValue, ExprStringValue, etc.) +- **`ExprType`** — Type system (DATE, TIMESTAMP, DOUBLE, STRUCT, etc.) +- **`StorageEngine`** / **`Table`** — Pluggable storage abstraction +- **`ExecutionEngine`** — Executes physical plans, returns QueryResponse + +### Design Patterns + +- **Visitor pattern** used pervasively: `AbstractNodeVisitor`, `LogicalPlanNodeVisitor`, `PhysicalPlanNodeVisitor`, `ExpressionNodeVisitor` +- **PhysicalPlan** implements `Iterator` for streaming execution +- **Guice** dependency injection in `OpenSearchPluginModule` + +## Fixing PPL Bugs + +Use `/ppl-bugfix #` to fix PPL bugs. It dispatches a subagent in an isolated worktree with a structured harness covering triage, fix, tests, and PR creation. + +## Adding New PPL Commands + +Follow the checklist in `docs/dev/ppl-commands.md`: +1. Update lexer/parser grammars (OpenSearchPPLLexer.g4, OpenSearchPPLParser.g4) +2. Add AST node under `org.opensearch.sql.ast.tree` +3. Add `visit*` method in `AbstractNodeVisitor`, override in `Analyzer`, `CalciteRelNodeVisitor`, `PPLQueryDataAnonymizer` +4. Unit tests extending `CalcitePPLAbstractTest` (include `verifyLogical()` and `verifyPPLToSparkSQL()`) +5. Integration tests extending `PPLIntegTestCase` +6. Add user docs under `docs/user/ppl/cmd/` + +## Adding New PPL Functions + +Follow `docs/dev/ppl-functions.md`. Three approaches: +1. Reuse existing Calcite operators from `SqlStdOperatorTable`/`SqlLibraryOperators` +2. Adapt static Java methods via `UserDefinedFunctionUtils.adapt*ToUDF` +3. Implement `ImplementorUDF` interface from scratch, register in `PPLBuiltinOperators` + +## Calcite Engine + +The execution engine is Apache Calcite-based, toggled via `plugins.calcite.enabled` (default: off in production, toggled per-test in integration tests). + +- In integration tests, call `enableCalcite()` in `init()` to activate the Calcite path +- Some features require pushdown optimization — use `enabledOnlyWhenPushdownIsEnabled()` to skip tests in `CalciteNoPushdownIT` +- `CalciteNoPushdownIT` re-runs Calcite test classes with pushdown disabled; add new test classes to its `@Suite.SuiteClasses` list + +## Integration Tests + +Located in `integ-test/src/test/java/`. Organized by area: `sql/`, `ppl/`, `calcite/`, `legacy/`, `jdbc/`, `datasource/`, `asyncquery/`, `security/`. Uses OpenSearch test framework (in-memory cluster per test class). YAML REST tests in `integ-test/src/yamlRestTest/resources/rest-api-spec/test/`. + +Key base classes: +- `PPLIntegTestCase` — base for PPL integration tests (v2 engine) +- `CalcitePPLIT` — base for Calcite PPL integration tests (calls `enableCalcite()`) +- `CalcitePPLAbstractTest` — base for Calcite PPL unit tests (`verifyLogical()`, `verifyPPLToSparkSQL()`) +- `CalciteExplainIT` — explain plan tests using YAML expected output files in `integ-test/src/test/resources/expectedOutput/calcite/` diff --git a/CLAUDE_GUIDE.md b/CLAUDE_GUIDE.md new file mode 100644 index 00000000000..034b403f5c6 --- /dev/null +++ b/CLAUDE_GUIDE.md @@ -0,0 +1,32 @@ +# Claude Commands + +Slash commands for Claude Code in this repository. Use them in any Claude Code session. + +## `/ppl-bugfix` + +Fix a PPL bug end-to-end or follow up on an existing PR. + +**Usage:** + +``` +/ppl-bugfix #1234 # Single issue +/ppl-bugfix PR#5678 # Single PR follow-up +/ppl-bugfix #1234 #5678 PR#9012 # Multiple in parallel +/ppl-bugfix # By URL +``` + +**Permission mode flags** (optional, append to any input): + +| Flag | Mode | Description | +|------|------|-------------| +| `--safe` | `acceptEdits` | File edits auto-approved, Bash commands need manual approval | +| `--yolo` | `bypassPermissions` | No prompts at all — subagent runs in isolated worktree (default) | + +**What it does:** + +1. Resolves issue/PR linkage automatically +2. For new issues: dispatches a subagent in an isolated git worktree that follows the full bugfix harness (triage → fix → test → PR) +3. For existing PRs: handles CI failures, review feedback, merge conflicts, or marks as ready + +**Related files:** [`.claude/harness/ppl-bugfix-harness.md`](.claude/harness/ppl-bugfix-harness.md) + diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 027fbc73b53..737bb091040 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -9,14 +9,10 @@ This document contains a list of maintainers in this repo. See [opensearch-proje | Eric Wei | [mengweieric](https://github.com/mengweieric) | Amazon | | Joshua Li | [joshuali925](https://github.com/joshuali925) | Amazon | | Shenoy Pratik | [ps48](https://github.com/ps48) | Amazon | -| Kavitha Mohan | [kavithacm](https://github.com/kavithacm) | Amazon | -| Derek Ho | [derek-ho](https://github.com/derek-ho) | Amazon | -| Lior Perry | [YANG-DB](https://github.com/YANG-DB) | Amazon | | Simeon Widdis | [swiddis](https://github.com/swiddis) | Amazon | | Chen Dai | [dai-chen](https://github.com/dai-chen) | Amazon | | Vamsi Manohar | [vamsimanohar](https://github.com/vamsimanohar) | Amazon | | Peng Huo | [penghuo](https://github.com/penghuo) | Amazon | -| Sean Kao | [seankao-az](https://github.com/seankao-az) | Amazon | | Anirudha Jadhav | [anirudha](https://github.com/anirudha) | Amazon | | Tomoyuki Morita | [ykmr1224](https://github.com/ykmr1224) | Amazon | | Lantao Jin | [LantaoJin](https://github.com/LantaoJin) | Amazon | @@ -25,22 +21,26 @@ This document contains a list of maintainers in this repo. See [opensearch-proje | Yuanchun Shen | [yuancu](https://github.com/yuancu) | Amazon | | Ryan Liang | [RyanL1997](https://github.com/RyanL1997) | Amazon | | Kai Huang | [ahkcs](https://github.com/ahkcs) | Amazon | -| Max Ksyunz | [MaxKsyunz](https://github.com/MaxKsyunz) | Improving | -| Yury Fridlyand | [Yury-Fridlyand](https://github.com/Yury-Fridlyand) | Improving | +| Songkan Tang | [songkant-aws](https://github.com/songkant-aws) | Amazon | | Andrew Carbonetto | [acarbonetto](https://github.com/acarbonetto) | Improving | -| Forest Vey | [forestmvey](https://github.com/forestmvey) | Improving | -| Guian Gumpac | [GumpacG](https://github.com/GumpacG) | Improving | ## Emeritus Maintainers -| Maintainer | GitHub ID | Affiliation | -| ----------------- | ------------------------------------------------------- | ----------- | -| Charlotte Henkle | [CEHENKLE](https://github.com/CEHENKLE) | Amazon | -| Nick Knize | [nknize](https://github.com/nknize) | Amazon | -| David Cui | [davidcui1225](https://github.com/davidcui1225) | Amazon | -| Eugene Lee | [eugenesk24](https://github.com/eugenesk24) | Amazon | -| Zhongnan Su | [zhongnansu](https://github.com/zhongnansu) | Amazon | -| Chloe Zhang | [chloe-zh](https://github.com/chloe-zh) | Amazon | -| Peter Fitzgibbons | [pjfitzgibbons](https://github.com/pjfitzgibbons) | Amazon | -| Rupal Mahajan | [rupal-bq](https://github.com/rupal-bq) | Amazon | - +| Maintainer | GitHub ID | +| ----------------- | ------------------------------------------------------- | +| Charlotte Henkle | [CEHENKLE](https://github.com/CEHENKLE) | +| Nick Knize | [nknize](https://github.com/nknize) | +| David Cui | [davidcui1225](https://github.com/davidcui1225) | +| Eugene Lee | [eugenesk24](https://github.com/eugenesk24) | +| Zhongnan Su | [zhongnansu](https://github.com/zhongnansu) | +| Chloe Zhang | [chloe-zh](https://github.com/chloe-zh) | +| Peter Fitzgibbons | [pjfitzgibbons](https://github.com/pjfitzgibbons) | +| Rupal Mahajan | [rupal-bq](https://github.com/rupal-bq) | +| Kavitha Mohan | [kavithacm](https://github.com/kavithacm) | +| Derek Ho | [derek-ho](https://github.com/derek-ho) | +| Lior Perry | [YANG-DB](https://github.com/YANG-DB) | +| Sean Kao | [seankao-az](https://github.com/seankao-az) | +| Max Ksyunz | [MaxKsyunz](https://github.com/MaxKsyunz) | +| Yury Fridlyand | [Yury-Fridlyand](https://github.com/Yury-Fridlyand) | +| Forest Vey | [forestmvey](https://github.com/forestmvey) | +| Guian Gumpac | [GumpacG](https://github.com/GumpacG) | diff --git a/api/README.md b/api/README.md index 91651aa3153..ee45a8c2dc0 100644 --- a/api/README.md +++ b/api/README.md @@ -8,7 +8,8 @@ This module provides components organized into two main areas aligned with the [ ### Unified Language Specification -- **`UnifiedQueryPlanner`**: Accepts PPL (Piped Processing Language) queries and returns Calcite `RelNode` logical plans as intermediate representation. +- **`UnifiedQueryParser`**: Parses PPL (Piped Processing Language) or SQL queries and returns the native parse result (`UnresolvedPlan` for PPL, `SqlNode` for Calcite SQL). +- **`UnifiedQueryPlanner`**: Accepts PPL or SQL queries and returns Calcite `RelNode` logical plans as intermediate representation. - **`UnifiedQueryTranspiler`**: Converts Calcite logical plans (`RelNode`) into SQL strings for various target databases using different SQL dialects. ### Unified Execution Runtime @@ -17,7 +18,7 @@ This module provides components organized into two main areas aligned with the [ - **`UnifiedFunction`**: Engine-agnostic function interface that enables functions to be evaluated across different execution engines without engine-specific code duplication. - **`UnifiedFunctionRepository`**: Repository for discovering and loading functions as `UnifiedFunction` instances, providing a bridge between function definitions and external execution engines. -Together, these components enable complete workflows: parse PPL queries into logical plans, transpile those plans into target database SQL, compile and execute queries directly, or export PPL functions for use in external execution engines. +Together, these components enable complete workflows: parse PPL or SQL queries into logical plans, transpile those plans into target database SQL, compile and execute queries directly, or export PPL functions for use in external execution engines. ### Experimental API Design @@ -33,7 +34,7 @@ Create a context with catalog configuration, query type, and optional settings: ```java UnifiedQueryContext context = UnifiedQueryContext.builder() - .language(QueryType.PPL) + .language(QueryType.PPL) // or QueryType.SQL for SQL .catalog("opensearch", opensearchSchema) .catalog("spark_catalog", sparkSchema) .defaultNamespace("opensearch") @@ -42,9 +43,23 @@ UnifiedQueryContext context = UnifiedQueryContext.builder() .build(); ``` +### UnifiedQueryParser + +Use `UnifiedQueryParser` to parse queries into their native parse tree. The parser is owned by `UnifiedQueryContext` and returns the native parse result for each language. + +```java +// PPL parsing +UnresolvedPlan ast = (UnresolvedPlan) context.getParser().parse("source = logs | where status = 200"); + +// SQL parsing (with QueryType.SQL context) +SqlNode sqlNode = (SqlNode) sqlContext.getParser().parse("SELECT * FROM logs WHERE status = 200"); +``` + +Callers can then use each language's native visitor infrastructure (`AbstractNodeVisitor` for PPL, `SqlBasicVisitor` for Calcite SQL) on the typed result for further analysis. + ### UnifiedQueryPlanner -Use `UnifiedQueryPlanner` to parse and analyze PPL queries into Calcite logical plans. The planner accepts a `UnifiedQueryContext` and can be reused for multiple queries. +Use `UnifiedQueryPlanner` to parse and analyze PPL or SQL queries into Calcite logical plans. The planner accepts a `UnifiedQueryContext` and can be reused for multiple queries. ```java // Create planner with context @@ -53,6 +68,9 @@ UnifiedQueryPlanner planner = new UnifiedQueryPlanner(context); // Plan multiple queries (context is reused) RelNode plan1 = planner.plan("source = logs | where status = 200"); RelNode plan2 = planner.plan("source = metrics | stats avg(cpu)"); + +// SQL queries are also supported (with QueryType.SQL context) +RelNode plan3 = planner.plan("SELECT * FROM logs WHERE status = 200"); ``` ### UnifiedQueryTranspiler @@ -176,6 +194,59 @@ try (UnifiedQueryContext context = UnifiedQueryContext.builder() } ``` +## Profiling + +The unified query API supports the same [profiling capability](../docs/user/ppl/interfaces/endpoint.md#profile-experimental) as the PPL REST endpoint. When enabled, each unified query component automatically collects per-phase timing metrics. For code outside unified query components (e.g., `PreparedStatement.executeQuery()` or response formatting), `context.measure()` records custom phases into the same profile. + +```java +try (UnifiedQueryContext context = UnifiedQueryContext.builder() + .language(QueryType.PPL) + .catalog("catalog", schema) + .defaultNamespace("catalog") + .profiling(true) + .build()) { + + // Auto-profiled: ANALYZE + RelNode plan = new UnifiedQueryPlanner(context).plan(query); + + // Auto-profiled: OPTIMIZE + PreparedStatement stmt = new UnifiedQueryCompiler(context).compile(plan); + + // User-profiled via measure() + ResultSet rs = context.measure(MetricName.EXECUTE, stmt::executeQuery); + String json = context.measure(MetricName.FORMAT, () -> formatter.format(result)); + + // Retrieve profile snapshot + QueryProfile profile = context.getProfile(); +} +``` + +The returned `QueryProfile` follows the same JSON structure as the REST API: + +```json +{ + "summary": { + "total_time_ms": 33.34 + }, + "phases": { + "analyze": { "time_ms": 8.68 }, + "optimize": { "time_ms": 18.2 }, + "execute": { "time_ms": 4.87 }, + "format": { "time_ms": 0.05 } + }, + "plan": { + "node": "EnumerableCalc", + "time_ms": 4.82, + "rows": 2, + "children": [ + { "node": "CalciteEnumerableIndexScan", "time_ms": 4.12, "rows": 2 } + ] + } +} +``` + +When profiling is disabled (the default), all components execute with zero overhead. + ## Development & Testing A set of unit tests is provided to validate planner behavior. @@ -226,5 +297,4 @@ public class MySchema extends AbstractSchema { ## Future Work -- Expand support to SQL language. - Extend planner to generate optimized physical plans using Calcite's optimization frameworks. diff --git a/api/build.gradle b/api/build.gradle index fb4cafe79d8..570efc6bb0e 100644 --- a/api/build.gradle +++ b/api/build.gradle @@ -13,6 +13,7 @@ plugins { dependencies { api project(':ppl') + api group: 'org.apache.calcite', name: 'calcite-babel', version: '1.41.0' testImplementation testFixtures(project(':api')) testImplementation group: 'junit', name: 'junit', version: '4.13.2' diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 3e0a1f972bd..8df9c519f50 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -5,7 +5,9 @@ package org.opensearch.sql.api; +import static org.opensearch.sql.common.setting.Settings.Key.CALCITE_ENGINE_ENABLED; import static org.opensearch.sql.common.setting.Settings.Key.PPL_JOIN_SUBSEARCH_MAXOUT; +import static org.opensearch.sql.common.setting.Settings.Key.PPL_REX_MAX_MATCH_LIMIT; import static org.opensearch.sql.common.setting.Settings.Key.PPL_SUBSEARCH_MAXOUT; import static org.opensearch.sql.common.setting.Settings.Key.QUERY_SIZE_LIMIT; @@ -13,34 +15,82 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import lombok.Value; +import java.util.Optional; +import java.util.concurrent.Callable; +import lombok.AllArgsConstructor; +import lombok.Getter; import org.apache.calcite.jdbc.CalciteSchema; import org.apache.calcite.plan.RelTraitDef; import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; import org.apache.calcite.schema.Schema; import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Programs; +import org.opensearch.sql.api.parser.CalciteSqlQueryParser; +import org.opensearch.sql.api.parser.PPLQueryParser; +import org.opensearch.sql.api.parser.UnifiedQueryParser; +import org.opensearch.sql.api.spec.LanguageSpec; +import org.opensearch.sql.api.spec.UnifiedPplSpec; +import org.opensearch.sql.api.spec.UnifiedSqlSpec; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.SysLimit; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.executor.QueryType; +import org.opensearch.sql.monitor.profile.MetricName; +import org.opensearch.sql.monitor.profile.ProfileMetric; +import org.opensearch.sql.monitor.profile.QueryProfile; +import org.opensearch.sql.monitor.profile.QueryProfiling; /** * A reusable abstraction shared across unified query components (planner, compiler, etc.). This * centralizes configuration for catalog schemas, query type, execution limits, and other settings, * enabling consistent behavior across all unified query operations. */ -@Value +@AllArgsConstructor +@Getter public class UnifiedQueryContext implements AutoCloseable { /** CalcitePlanContext containing Calcite framework configuration and query type. */ - CalcitePlanContext planContext; + private final CalcitePlanContext planContext; /** Settings containing execution limits and feature flags used by parsers and planners. */ - Settings settings; + private final Settings settings; + + /** Query parser created eagerly from this context's configuration. */ + private final UnifiedQueryParser parser; + + /** Language spec for the query's frontend (SQL or PPL). */ + private final LanguageSpec langSpec; + + /** + * Returns the profiling result. Call after query execution to retrieve collected metrics. Returns + * empty if profiling was not enabled. + */ + public Optional getProfile() { + return Optional.ofNullable(QueryProfiling.current().finish()); + } + + /** + * Measures the execution time of the given action and records it as a profiling metric. When + * profiling is disabled, the action executes with no overhead. Use this for phases outside + * unified query components (e.g., execution, formatting). + * + * @param the return type of the action + * @param metricName the metric to record + * @param action the action to measure + * @return the result of the action + * @throws Exception if the action throws + */ + public T measure(MetricName metricName, Callable action) throws Exception { + ProfileMetric metric = QueryProfiling.current().getOrCreateMetric(metricName); + long start = System.nanoTime(); + try { + return action.call(); + } finally { + metric.set(System.nanoTime() - start); + } + } /** * Closes the underlying resource managed by this context. @@ -49,6 +99,7 @@ public class UnifiedQueryContext implements AutoCloseable { */ @Override public void close() throws Exception { + QueryProfiling.clear(); if (planContext != null && planContext.connection != null) { planContext.connection.close(); } @@ -65,17 +116,36 @@ public static class Builder { private final Map catalogs = new HashMap<>(); private String defaultNamespace; private boolean cacheMetadata = false; + private boolean profiling = false; /** * Setting values with defaults from SysLimit.DEFAULT. Only includes planning-required settings * to avoid coupling with OpenSearchSettings. + * + *

{@link Settings.Key#CALCITE_ENGINE_ENABLED} defaults to {@code true} here because the + * unified query path is by definition Calcite-based — every query reaching this context flows + * through Calcite's planner, never the v2 engine. The PPL {@link + * org.opensearch.sql.api.parser.PPLQueryParser} reuses the v2 {@code AstBuilder}, which gates + * Calcite-only commands (e.g. {@code visitTableCommand}) on this setting; without the default, + * those commands fail at parse time even when the cluster setting is true. + * + *

{@link Settings.Key#PPL_REX_MAX_MATCH_LIMIT} defaults to {@code 10} here because {@code + * AstBuilder.visitRexCommand} reads it unconditionally and unboxes to {@code int} — a {@code + * null} return from {@code getSettingValue} NPEs the planner before any operator-level + * capability check runs. The value mirrors the cluster-side default of {@code 10} registered by + * {@code OpenSearchSettings.PPL_REX_MAX_MATCH_LIMIT_SETTING}. Cluster-side overrides reach this + * map via {@link #setting(String, Object)} — the REST handler reads the live value from {@code + * OpenSearchSettings} and routes it through that existing API, keeping {@link + * UnifiedQueryContext} decoupled from any specific {@link Settings} implementation. */ private final Map settings = new HashMap( Map.of( QUERY_SIZE_LIMIT, SysLimit.DEFAULT.querySizeLimit(), PPL_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.subsearchLimit(), - PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.joinSubsearchLimit())); + PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.joinSubsearchLimit(), + CALCITE_ENGINE_ENABLED, true, + PPL_REX_MAX_MATCH_LIMIT, 10)); /** * Sets the query language frontend to be used. @@ -124,6 +194,18 @@ public Builder cacheMetadata(boolean cache) { return this; } + /** + * Enables or disables query profiling. When enabled, profiling metrics are collected during + * query planning and execution, retrievable via {@link UnifiedQueryContext#getProfile()}. + * + * @param enabled whether to enable profiling + * @return this builder instance + */ + public Builder profiling(boolean enabled) { + this.profiling = enabled; + return this; + } + /** * Sets a specific setting value by name. * @@ -147,11 +229,26 @@ public Builder setting(String name, Object value) { public UnifiedQueryContext build() { Objects.requireNonNull(queryType, "Must specify language before build"); + LanguageSpec langSpec = + switch (queryType) { + case SQL -> UnifiedSqlSpec.extended(); + case PPL -> UnifiedPplSpec.create(); + }; + Settings settings = buildSettings(); CalcitePlanContext planContext = CalcitePlanContext.create( - buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); - return new UnifiedQueryContext(planContext, settings); + buildFrameworkConfig(langSpec), SysLimit.fromSettings(settings), queryType); + QueryProfiling.activate(profiling); + return new UnifiedQueryContext( + planContext, settings, createParser(planContext, settings), langSpec); + } + + private UnifiedQueryParser createParser(CalcitePlanContext planContext, Settings settings) { + return switch (queryType) { + case PPL -> new PPLQueryParser(settings); + case SQL -> new CalciteSqlQueryParser(planContext); + }; } private Settings buildSettings() { @@ -170,16 +267,21 @@ public List getSettings() { } @SuppressWarnings({"rawtypes"}) - private FrameworkConfig buildFrameworkConfig() { + private FrameworkConfig buildFrameworkConfig(LanguageSpec langSpec) { SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus(); catalogs.forEach(rootSchema::add); SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace); - return Frameworks.newConfigBuilder() - .parserConfig(SqlParser.Config.DEFAULT) - .defaultSchema(defaultSchema) - .traitDefs((List) null) - .programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE)) + Frameworks.ConfigBuilder builder = + Frameworks.newConfigBuilder() + .defaultSchema(defaultSchema) + .traitDefs((List) null) + .programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE)); + + return builder + .parserConfig(langSpec.parserConfig()) + .sqlValidatorConfig(langSpec.validatorConfig()) + .operatorTable(langSpec.operatorTable()) .build(); } diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java index 91e35335e20..54a429e4cfb 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java @@ -5,22 +5,25 @@ package org.opensearch.sql.api; -import org.antlr.v4.runtime.tree.ParseTree; +import static org.opensearch.sql.monitor.profile.MetricName.ANALYZE; + +import lombok.RequiredArgsConstructor; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.logical.LogicalSort; -import org.opensearch.sql.ast.statement.Query; -import org.opensearch.sql.ast.statement.Statement; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.util.SqlVisitor; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Planner; +import org.opensearch.sql.api.parser.UnifiedQueryParser; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.calcite.CalciteRelNodeVisitor; -import org.opensearch.sql.common.antlr.Parser; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.executor.QueryType; -import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; -import org.opensearch.sql.ppl.parser.AstBuilder; -import org.opensearch.sql.ppl.parser.AstStatementBuilder; /** * {@code UnifiedQueryPlanner} provides a high-level API for parsing and analyzing queries using the @@ -28,15 +31,12 @@ * such as Spark or command-line tools, abstracting away Calcite internals. */ public class UnifiedQueryPlanner { - /** The parser instance responsible for converting query text into a parse tree. */ - private final Parser parser; - /** Unified query context containing CalcitePlanContext with all configuration. */ - private final UnifiedQueryContext context; + /** Planning strategy selected at construction time based on query type. */ + private final PlanningStrategy strategy; - /** AST-to-RelNode visitor that builds logical plans from the parsed AST. */ - private final CalciteRelNodeVisitor relNodeVisitor = - new CalciteRelNodeVisitor(new EmptyDataSourceService()); + /** Unified query context for profiling support. */ + private final UnifiedQueryContext context; /** * Constructs a UnifiedQueryPlanner with a unified query context. @@ -44,60 +44,96 @@ public class UnifiedQueryPlanner { * @param context the unified query context containing CalcitePlanContext */ public UnifiedQueryPlanner(UnifiedQueryContext context) { - this.parser = buildQueryParser(context.getPlanContext().queryType); this.context = context; + this.strategy = + context.getPlanContext().queryType == QueryType.SQL + ? new CalciteNativeStrategy(context) + : new CustomVisitorStrategy(context); } /** * Parses and analyzes a query string into a Calcite logical plan (RelNode). TODO: Generate * optimal physical plan to fully unify query execution and leverage Calcite's optimizer. * - * @param query the raw query string in PPL or other supported syntax + * @param query the raw query string in PPL or SQL syntax * @return a logical plan representing the query */ public RelNode plan(String query) { try { - return preserveCollation(analyze(parse(query))); - } catch (SyntaxCheckException e) { - // Re-throw syntax error without wrapping + return context.measure( + ANALYZE, + () -> { + RelNode plan = strategy.plan(query); + for (var shuttle : context.getLangSpec().postAnalysisRules()) { + plan = plan.accept(shuttle); + } + return plan; + }); + } catch (SyntaxCheckException | UnsupportedOperationException e) { throw e; } catch (Exception e) { throw new IllegalStateException("Failed to plan query", e); } } - private Parser buildQueryParser(QueryType queryType) { - if (queryType == QueryType.PPL) { - return new PPLSyntaxParser(); - } - throw new IllegalArgumentException("Unsupported query type: " + queryType); + /** Strategy interface for language-specific planning logic. */ + private interface PlanningStrategy { + RelNode plan(String query) throws Exception; } - private UnresolvedPlan parse(String query) { - ParseTree cst = parser.parse(query); - AstStatementBuilder astStmtBuilder = - new AstStatementBuilder( - new AstBuilder(query, context.getSettings()), - AstStatementBuilder.StatementBuilderContext.builder().build()); - Statement statement = cst.accept(astStmtBuilder); + /** ANSI SQL planning using Calcite's native SqlParser → SqlValidator → SqlToRelConverter. */ + @RequiredArgsConstructor + private static class CalciteNativeStrategy implements PlanningStrategy { + private final UnifiedQueryContext context; + + @Override + public RelNode plan(String query) throws Exception { + try (Planner planner = Frameworks.getPlanner(context.getPlanContext().config)) { + SqlNode parsed = planner.parse(query); + if (!parsed.isA(SqlKind.QUERY)) { + throw new UnsupportedOperationException( + "Only query statements are supported. Got: " + parsed.getKind()); + } + + // TODO: move post-parse rewriting into CalciteSqlQueryParser + SqlNode rewritten = parsed; + for (SqlVisitor visitor : context.getLangSpec().postParseRules()) { + rewritten = rewritten.accept(visitor); + } - if (statement instanceof Query) { - return ((Query) statement).getPlan(); + SqlNode validated = planner.validate(rewritten); + RelRoot relRoot = planner.rel(validated); + return relRoot.project(); + } } - throw new UnsupportedOperationException( - "Only query statements are supported but got " + statement.getClass().getSimpleName()); } - private RelNode analyze(UnresolvedPlan ast) { - return relNodeVisitor.analyze(ast, context.getPlanContext()); - } + /** AST-based planning via context-owned parser → UnresolvedPlan → CalciteRelNodeVisitor. */ + private static class CustomVisitorStrategy implements PlanningStrategy { + private final UnifiedQueryContext context; + private final UnifiedQueryParser parser; + private final CalciteRelNodeVisitor relNodeVisitor = + new CalciteRelNodeVisitor(new EmptyDataSourceService()); + + @SuppressWarnings("unchecked") + CustomVisitorStrategy(UnifiedQueryContext context) { + this.context = context; + this.parser = (UnifiedQueryParser) context.getParser(); + } + + @Override + public RelNode plan(String query) { + UnresolvedPlan ast = parser.parse(query); + RelNode logical = relNodeVisitor.analyze(ast, context.getPlanContext()); + return preserveCollation(logical); + } - private RelNode preserveCollation(RelNode logical) { - RelNode calcitePlan = logical; - RelCollation collation = logical.getTraitSet().getCollation(); - if (!(logical instanceof Sort) && collation != RelCollations.EMPTY) { - calcitePlan = LogicalSort.create(logical, collation, null, null); + private RelNode preserveCollation(RelNode logical) { + RelCollation collation = logical.getTraitSet().getCollation(); + if (!(logical instanceof Sort) && collation != RelCollations.EMPTY) { + return LogicalSort.create(logical, collation, null, null); + } + return logical; } - return calcitePlan; } } diff --git a/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java b/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java index 20cf04e3f5c..4554b3d060d 100644 --- a/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java +++ b/api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java @@ -5,6 +5,8 @@ package org.opensearch.sql.api.compiler; +import static org.opensearch.sql.monitor.profile.MetricName.OPTIMIZE; + import java.sql.Connection; import java.sql.PreparedStatement; import lombok.NonNull; @@ -46,26 +48,34 @@ public UnifiedQueryCompiler(UnifiedQueryContext context) { */ public PreparedStatement compile(@NonNull RelNode plan) { try { - // Apply shuttle to convert LogicalTableScan to BindableTableScan - final RelHomogeneousShuttle shuttle = - new RelHomogeneousShuttle() { - @Override - public RelNode visit(TableScan scan) { - final RelOptTable table = scan.getTable(); - if (scan instanceof LogicalTableScan - && Bindables.BindableTableScan.canHandle(table)) { - return Bindables.BindableTableScan.create(scan.getCluster(), table); - } - return super.visit(scan); - } - }; - RelNode transformedPlan = plan.accept(shuttle); - - Connection connection = context.getPlanContext().connection; - final RelRunner runner = connection.unwrap(RelRunner.class); - return runner.prepareStatement(transformedPlan); + return context.measure(OPTIMIZE, () -> doCompile(plan)); } catch (Exception e) { throw new IllegalStateException("Failed to compile logical plan", e); } } + + private PreparedStatement doCompile(RelNode plan) throws Exception { + // Apply pre-compilation rules (e.g., late-binding function impl) + for (var rule : context.getLangSpec().preCompilationRules()) { + plan = plan.accept(rule); + } + + // Apply shuttle to convert LogicalTableScan to BindableTableScan + final RelHomogeneousShuttle shuttle = + new RelHomogeneousShuttle() { + @Override + public RelNode visit(TableScan scan) { + final RelOptTable table = scan.getTable(); + if (scan instanceof LogicalTableScan && Bindables.BindableTableScan.canHandle(table)) { + return Bindables.BindableTableScan.create(scan.getCluster(), table); + } + return super.visit(scan); + } + }; + RelNode transformedPlan = plan.accept(shuttle); + + Connection connection = context.getPlanContext().connection; + final RelRunner runner = connection.unwrap(RelRunner.class); + return runner.prepareStatement(transformedPlan); + } } diff --git a/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java new file mode 100644 index 00000000000..b92e75bf342 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/CalciteSqlQueryParser.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import lombok.RequiredArgsConstructor; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParseException; +import org.apache.calcite.sql.parser.SqlParser; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.common.antlr.SyntaxCheckException; + +/** Calcite SQL query parser that produces {@link SqlNode} as the native parse result. */ +@RequiredArgsConstructor +public class CalciteSqlQueryParser implements UnifiedQueryParser { + + /** Calcite plan context providing parser configuration (e.g., case sensitivity, conformance). */ + private final CalcitePlanContext planContext; + + @Override + public SqlNode parse(String query) { + try { + SqlParser parser = SqlParser.create(query, planContext.config.getParserConfig()); + return parser.parseQuery(); + } catch (SqlParseException e) { + throw new SyntaxCheckException("Failed to parse SQL query: " + e.getMessage()); + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java new file mode 100644 index 00000000000..af094404d70 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/PPLQueryParser.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import lombok.RequiredArgsConstructor; +import org.antlr.v4.runtime.tree.ParseTree; +import org.opensearch.sql.ast.statement.Query; +import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; +import org.opensearch.sql.ppl.parser.AstBuilder; +import org.opensearch.sql.ppl.parser.AstStatementBuilder; + +/** PPL query parser that produces {@link UnresolvedPlan} as the native parse result. */ +@RequiredArgsConstructor +public class PPLQueryParser implements UnifiedQueryParser { + + /** Settings containing execution limits and feature flags used by AST builders. */ + private final Settings settings; + + /** Reusable ANTLR-based PPL syntax parser. Stateless and thread-safe. */ + private final PPLSyntaxParser syntaxParser = new PPLSyntaxParser(); + + @Override + public UnresolvedPlan parse(String query) { + ParseTree cst = syntaxParser.parse(query); + AstStatementBuilder astStmtBuilder = + new AstStatementBuilder( + new AstBuilder(query, settings), + AstStatementBuilder.StatementBuilderContext.builder().build()); + Statement statement = cst.accept(astStmtBuilder); + + if (statement instanceof Query) { + return ((Query) statement).getPlan(); + } + throw new UnsupportedOperationException( + "Only query statements are supported but got " + statement.getClass().getSimpleName()); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java b/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java new file mode 100644 index 00000000000..76918f93eae --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/UnifiedQueryParser.java @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +/** + * Language-neutral query parser interface. Returns the native parse result for the language (e.g., + * {@code UnresolvedPlan} for PPL, {@code SqlNode} for Calcite SQL). + * + * @param the native parse result type for this language + */ +public interface UnifiedQueryParser { + + /** + * Parses the query and returns the native parse result. + * + * @param query the raw query string + * @return the native parse result + */ + T parse(String query); +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java b/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java new file mode 100644 index 00000000000..e1916d33f5b --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/FunctionSpecBuilder.java @@ -0,0 +1,186 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.List; +import java.util.Objects; +import java.util.function.BiFunction; +import javax.annotation.Nullable; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlOperandMetadata; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; + +/** Fluent DSL for building {@link UnifiedFunctionSpec} instances. */ +@RequiredArgsConstructor +class FunctionSpecBuilder { + /** Function name to register. */ + private final String name; + + /** + * Wraps an existing Calcite operator, preserving its native type system and RexImpTable + * implementation for in-memory execution. + * + * @param op the Calcite operator to delegate to + * @return a builder that produces the spec on {@code build()} + */ + DelegateFunctionBuilder delegateTo(SqlOperator op) { + return new DelegateFunctionBuilder(name, op); + } + + /** + * Builds a pushdown-only UDF with relaxed type checking. The resulting function has no local + * implementation and delegates execution to the data source via pushdown. + * + * @param paramNames required parameter names for signature display + * @return a builder that produces the spec on {@code build()} + */ + CatalogFunctionBuilder vararg(String... paramNames) { + return new CatalogFunctionBuilder(name, List.of(paramNames)); + } + + /** + * Builds a typed SqlFunction with strict operand type checking. Optionally accepts a late-binding + * {@code impl} that rewrites the function into executable Calcite expressions at compilation + * time. + * + * @param families operand type families for validation + * @return a builder that produces the spec on {@code build()} + */ + DefaultFunctionBuilder operands(SqlTypeFamily... families) { + return new DefaultFunctionBuilder(name, families); + } + + @RequiredArgsConstructor + static class DefaultFunctionBuilder { + private final String name; + private final SqlTypeFamily[] operandFamilies; + private SqlReturnTypeInference returnType; + private SqlFunctionCategory category = SqlFunctionCategory.USER_DEFINED_FUNCTION; + private @Nullable BiFunction impl; + + DefaultFunctionBuilder returns(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } + + DefaultFunctionBuilder category(SqlFunctionCategory cat) { + this.category = cat; + return this; + } + + /** + * Defines how this function executes by rewriting to existing Calcite operators. Applied only + * at compilation time (late binding) — the logical plan preserves the original function call. + * + * @param impl rewrite function that converts this call into executable RexNodes + * @return this builder + */ + DefaultFunctionBuilder impl(BiFunction impl) { + this.impl = impl; + return this; + } + + UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returns() is required"); + SqlFunction op = + new SqlFunction( + name.toUpperCase(), + SqlKind.OTHER_FUNCTION, + returnType, + null, + OperandTypes.family(operandFamilies), + category); + return new UnifiedFunctionSpec(name.toLowerCase(), op, impl); + } + } + + @RequiredArgsConstructor + static class DelegateFunctionBuilder { + private final String name; + private final SqlOperator operator; + + UnifiedFunctionSpec build() { + return new UnifiedFunctionSpec(name.toLowerCase(), operator, null); + } + } + + @RequiredArgsConstructor + static class CatalogFunctionBuilder { + private final String name; + private final List paramNames; + private SqlReturnTypeInference returnType; + + CatalogFunctionBuilder returnType(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } + + UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returnType is required"); + return new UnifiedFunctionSpec( + name, + new SqlUserDefinedFunction( + new SqlIdentifier(name, SqlParserPos.ZERO), + SqlKind.OTHER_FUNCTION, + returnType, + InferTypes.ANY_NULLABLE, + new VariadicOperandMetadata(paramNames), + List::of), // Pushdown-only: no local implementation + null); + } + } + + /** + * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code + * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts + * any operand types and delegates validation to pushdown. + */ + record VariadicOperandMetadata(List paramNames) implements SqlOperandMetadata { + + @Override + public List paramNames() { + return paramNames; + } + + @Override + public List paramTypes(RelDataTypeFactory tf) { + return List.of(); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { + return true; + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(paramNames.size()); + } + + @Override + public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java new file mode 100644 index 00000000000..4009ee13bc0 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java @@ -0,0 +1,124 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.util.SqlOperatorTables; +import org.apache.calcite.sql.util.SqlVisitor; +import org.apache.calcite.sql.validate.SqlValidator; + +/** + * Language specification defining the dialect the engine accepts. Provides parser configuration, + * validator configuration, and composable {@link LanguageExtension}s that contribute operators, + * post-parse rewrite rules, and post-analysis rewrite rules. + * + *

Implementations define a complete language surface — for example, {@link UnifiedSqlSpec} + * provides ANSI and extended SQL modes. A future PPL spec would implement this same interface once + * PPL converges on the Calcite pipeline. + */ +public interface LanguageSpec { + + /** + * A composable language extension that contributes operators, post-parse rewrite rules, and + * post-analysis rewrite rules. All methods have defaults so extensions only override what they + * need. + */ + interface LanguageExtension { + + /** + * Operators (functions, aggregates) this extension adds. Chained with the standard operator + * table during validation. + */ + default SqlOperatorTable operators() { + return SqlOperatorTables.of(); + } + + /** + * AST rewrite rules applied after parsing and before validation. Each visitor transforms the + * parse tree (e.g., rewriting named arguments into MAP literals). + */ + default List> postParseRules() { + return List.of(); + } + + /** + * RelNode rewrite rules applied after analysis and before execution. Each rule transforms the + * logical plan tree. Rules within a single extension are applied in list order. + */ + default List postAnalysisRules() { + return List.of(); + } + + /** + * Pre-compilation rules applied only before in-memory execution. Each rule transforms the + * logical plan (e.g., binding function implementations). Not applied when the plan is consumed + * by external engines. + */ + default List preCompilationRules() { + return List.of(); + } + } + + /** + * Parser configuration controlling how SQL text is tokenized and parsed into a parse tree, + * including parser factory, lexical rules, and conformance. + */ + SqlParser.Config parserConfig(); + + /** + * Validator configuration controlling what SQL semantics the validator accepts, such as GROUP BY + * behavior, LIMIT syntax, and type coercion. + */ + SqlValidator.Config validatorConfig(); + + /** + * Language extensions registered with this spec. Each extension contributes operators, post-parse + * rewrite rules, and post-analysis rewrite rules composed by {@link #operatorTable()}, {@link + * #postParseRules()}, and {@link #postAnalysisRules()}. + */ + List extensions(); + + /** + * Chained operator table combining the standard Calcite operators with all operators contributed + * by registered extensions. + */ + default SqlOperatorTable operatorTable() { + List tables = new ArrayList<>(); + tables.add(SqlStdOperatorTable.instance()); + extensions().forEach(ext -> tables.add(ext.operators())); + return SqlOperatorTables.chain(tables); + } + + /** + * All post-parse rewrite rules from registered extensions, flattened in registration order. + * Applied to the parse tree after parsing and before validation. + */ + default List> postParseRules() { + return extensions().stream().flatMap(ext -> ext.postParseRules().stream()).toList(); + } + + /** + * All post-analysis RelNode rewrite rules from registered extensions, flattened in registration + * order. Applied to the logical plan after analysis and before execution. + */ + default List postAnalysisRules() { + return extensions().stream().flatMap(ext -> ext.postAnalysisRules().stream()).toList(); + } + + /** + * All pre-compilation rules from registered extensions, flattened in registration order. Applied + * only before in-memory execution. + */ + default List preCompilationRules() { + return extensions().stream().flatMap(ext -> ext.preCompilationRules().stream()).toList(); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java new file mode 100644 index 00000000000..72392b7c520 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java @@ -0,0 +1,135 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import static org.apache.calcite.sql.SqlFunctionCategory.TIMEDATE; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.LENGTH; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3; +import static org.apache.calcite.sql.fun.SqlStdOperatorTable.FLOOR; +import static org.apache.calcite.sql.type.ReturnTypes.ARG1_NULLABLE; +import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; +import static org.apache.calcite.sql.type.SqlTypeFamily.CHARACTER; +import static org.apache.calcite.sql.type.SqlTypeFamily.DATETIME; + +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.BiFunction; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.apache.calcite.avatica.util.TimeUnitRange; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.type.SqlOperandMetadata; +import org.apache.calcite.sql.util.SqlOperatorTables; + +/** + * Declarative registry of language-level functions for the unified query engine. Functions defined + * here are part of the language spec — always resolvable regardless of the underlying data source. + * They are grouped into {@link Category categories} that callers chain into Calcite's operator + * table. Data-source capability is enforced at optimization time by pushdown rules. + */ +@Getter +@ToString(of = "funcName") +@EqualsAndHashCode(of = "funcName") +@AllArgsConstructor(access = AccessLevel.PACKAGE) +public final class UnifiedFunctionSpec { + + /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ + private final String funcName; + + /** Calcite operator for chaining into the framework config's operator table. */ + private final SqlOperator operator; + + /** Optional late-binding implementation applied only at compilation time. */ + private final @Nullable BiFunction impl; + + /** Common scalar functions beyond standard. */ + public static final Category SCALAR = + new Category( + List.of( + function("length").delegateTo(LENGTH).build(), + function("regexp_replace").delegateTo(REGEXP_REPLACE_3).build(), + function("date_trunc") + .operands(CHARACTER, DATETIME) + .returns(ARG1_NULLABLE) + .category(TIMEDATE) + .impl( + (rexBuilder, call) -> { + RexLiteral unitLiteral = (RexLiteral) call.operands.get(0); + String unit = unitLiteral.getValueAs(String.class); + RexNode datetime = call.operands.get(1); + return rexBuilder.makeCall( + FLOOR, + datetime, + rexBuilder.makeFlag(TimeUnitRange.valueOf(unit.toUpperCase()))); + }) + .build())); + + /** Full-text search functions. */ + public static final Category RELEVANCE = + new Category( + List.of( + function("match").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(), + function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(), + function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(), + function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); + + /** All registered function specs, keyed by function name. */ + public static final Map ALL_SPECS = + Stream.of(SCALAR, RELEVANCE) + .flatMap(c -> c.specs().stream()) + .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); + + /** + * Looks up a function spec by name across all categories. + * + * @param name function name (case-insensitive) + * @return the spec, or empty if not found + */ + public static Optional of(String name) { + return Optional.ofNullable(ALL_SPECS.get(name.toLowerCase())); + } + + /** + * @return required param names from {@link SqlOperandMetadata}, or empty if not available. + */ + public List getParamNames() { + return operator.getOperandTypeChecker() instanceof SqlOperandMetadata metadata + ? metadata.paramNames() + : List.of(); + } + + /** A group of function specs that can be chained into Calcite's operator table. */ + public record Category(List specs) { + public SqlOperatorTable operatorTable() { + return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList()); + } + + /** Returns true if this category contains the given spec. */ + public boolean contains(UnifiedFunctionSpec spec) { + return specs.contains(spec); + } + } + + /** Entry point for the function spec builder DSL. */ + private static FunctionSpecBuilder function(String name) { + return new FunctionSpecBuilder(name); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java new file mode 100644 index 00000000000..a34b9b98806 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.validate.SqlValidator; +import org.opensearch.sql.api.spec.datetime.DatetimeExtension; + +/** + * PPL language specification. + * + *

Note: PPL currently has its own parsing and analyzing pipeline, so only configuration and + * extensions applied after RelNode construction are in use. The parser and validator configs + * returned here are inert for the PPL path. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class UnifiedPplSpec implements LanguageSpec { + + public static UnifiedPplSpec create() { + return new UnifiedPplSpec(); + } + + @Override + public SqlParser.Config parserConfig() { + return SqlParser.config(); + } + + @Override + public SqlValidator.Config validatorConfig() { + return SqlValidator.Config.DEFAULT; + } + + @Override + public List extensions() { + return List.of(new DatetimeExtension()); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java new file mode 100644 index 00000000000..28eeaa89abf --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedSqlSpec.java @@ -0,0 +1,69 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.List; +import lombok.AccessLevel; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.experimental.Accessors; +import org.apache.calcite.config.Lex; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.parser.SqlParserImplFactory; +import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl; +import org.apache.calcite.sql.validate.SqlConformanceEnum; +import org.apache.calcite.sql.validate.SqlValidator; +import org.opensearch.sql.api.spec.core.CoreExtension; +import org.opensearch.sql.api.spec.search.SearchExtension; + +/** + * SQL language specification. Configures Calcite's parser, validator, and composable extensions for + * OpenSearch SQL compatibility. + * + *

Use {@link #extended()} for the default configuration with lenient syntax, hyphenated + * identifiers, and search functions. + */ +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +@Accessors(fluent = true) +public class UnifiedSqlSpec implements LanguageSpec { + + /** Lexical rules: identifier quoting, character escaping, and special identifier support. */ + private final Lex lex; + + /** Parser implementation: controls keyword reservation and grammar extensions. */ + private final SqlParserImplFactory parserFactory; + + /** Validation rules: what SQL semantics the validator accepts (GROUP BY, LIMIT, coercion). */ + private final SqlConformanceEnum conformance; + + /** Composable extensions contributing operators and post-parse rewrite rules. */ + @Getter private final List extensions; + + /** + * Extended SQL spec: Babel parser, BIG_QUERY lex (hyphenated identifiers, backtick quoting), + * BABEL conformance (lenient GROUP BY, LIMIT, optional FROM), and search functions. + */ + public static UnifiedSqlSpec extended() { + return new UnifiedSqlSpec( + Lex.BIG_QUERY, + SqlBabelParserImpl.FACTORY, + SqlConformanceEnum.BABEL, + List.of(new CoreExtension(), new SearchExtension())); + } + + @Override + public SqlParser.Config parserConfig() { + return SqlParser.config() + .withParserFactory(parserFactory) + .withLex(lex) + .withConformance(conformance); + } + + @Override + public SqlValidator.Config validatorConfig() { + return SqlValidator.Config.DEFAULT.withConformance(conformance); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java new file mode 100644 index 00000000000..17aa8a20bee --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/core/CoreExtension.java @@ -0,0 +1,28 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.core; + +import java.util.List; +import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.sql.SqlOperatorTable; +import org.opensearch.sql.api.spec.LanguageSpec; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Core extension that extends the default language spec with additional functions and capabilities. + */ +public class CoreExtension implements LanguageSpec.LanguageExtension { + + @Override + public SqlOperatorTable operators() { + return UnifiedFunctionSpec.SCALAR.operatorTable(); + } + + @Override + public List preCompilationRules() { + return List.of(new LateBindingFunctionRule()); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java b/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java new file mode 100644 index 00000000000..3294d21a241 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/core/LateBindingFunctionRule.java @@ -0,0 +1,49 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.core; + +import java.util.Map; +import java.util.Optional; +import java.util.function.BiFunction; +import java.util.stream.Collectors; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.SqlOperator; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Binds custom function implementations at compilation time by rewriting to executable Calcite + * expressions. + */ +class LateBindingFunctionRule extends RelHomogeneousShuttle { + + /** Operator-to-impl mappings collected from all function specs. */ + private final Map> bindings = + UnifiedFunctionSpec.ALL_SPECS.values().stream() + .filter(spec -> spec.getImpl() != null) + .collect( + Collectors.toMap(UnifiedFunctionSpec::getOperator, UnifiedFunctionSpec::getImpl)); + + @Override + public RelNode visit(RelNode node) { + RelNode visited = super.visit(node); + RexBuilder rexBuilder = node.getCluster().getRexBuilder(); + return visited.accept( + new RexShuttle() { + @Override + public RexNode visitCall(RexCall call) { + RexCall visited = (RexCall) super.visitCall(call); + return Optional.ofNullable(bindings.get(visited.getOperator())) + .map(impl -> impl.apply(rexBuilder, visited)) + .orElse(visited); + } + }); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeExtension.java new file mode 100644 index 00000000000..944ac4a4bf1 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeExtension.java @@ -0,0 +1,53 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.api.spec.LanguageSpec.LanguageExtension; +import org.opensearch.sql.calcite.type.AbstractExprRelDataType; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; + +/** Datetime language extension that normalizes UDT types and casts output for wire-format. */ +public class DatetimeExtension implements LanguageExtension { + + @Override + public List postAnalysisRules() { + return List.of(DatetimeUdtNormalizeRule.INSTANCE, DatetimeOutputCastRule.INSTANCE); + } + + /** Maps datetime UDT types to their standard Calcite equivalents. */ + @Getter + @RequiredArgsConstructor + enum UdtMapping { + DATE(ExprUDT.EXPR_DATE, SqlTypeName.DATE), + TIME(ExprUDT.EXPR_TIME, SqlTypeName.TIME), + TIMESTAMP(ExprUDT.EXPR_TIMESTAMP, SqlTypeName.TIMESTAMP); + + private final ExprUDT udtType; + private final SqlTypeName stdType; + + /** Matches a UDT RelDataType to its mapping, or empty if not a datetime UDT. */ + static Optional fromUdtType(RelDataType type) { + if (!(type instanceof AbstractExprRelDataType e)) { + return Optional.empty(); + } + ExprUDT udt = e.getUdt(); + return Arrays.stream(values()).filter(u -> u.udtType == udt).findFirst(); + } + + /** Returns true if the given SqlTypeName is a standard datetime type. */ + static boolean isDatetimeType(SqlTypeName typeName) { + return Arrays.stream(values()).anyMatch(u -> u.stdType == typeName); + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeOutputCastRule.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeOutputCastRule.java new file mode 100644 index 00000000000..9a7ae25e003 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeOutputCastRule.java @@ -0,0 +1,62 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import static org.opensearch.sql.api.spec.datetime.DatetimeExtension.UdtMapping.isDatetimeType; + +import java.util.ArrayList; +import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.SqlTypeName; + +/** Wraps the root output with CAST(datetime → VARCHAR) for PPL wire-format compatibility. */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +class DatetimeOutputCastRule extends RelHomogeneousShuttle { + + static final DatetimeOutputCastRule INSTANCE = new DatetimeOutputCastRule(); + + @Override + public RelNode visit(RelNode other) { + List fields = other.getRowType().getFieldList(); + if (fields.stream().noneMatch(f -> isDatetimeType(f.getType().getSqlTypeName()))) { + return other; + } + + RexBuilder rexBuilder = other.getCluster().getRexBuilder(); + List projects = new ArrayList<>(fields.size()); + List names = new ArrayList<>(fields.size()); + + // Cast datetime fields to VARCHAR for output; pass through others unchanged + for (RelDataTypeField field : fields) { + RexNode newField = rexBuilder.makeInputRef(other, field.getIndex()); + RelDataType fieldType = field.getType(); + if (isDatetimeType(fieldType.getSqlTypeName())) { + projects.add(castToVarchar(rexBuilder, newField, fieldType)); + } else { + projects.add(newField); + } + names.add(field.getName()); + } + return LogicalProject.create(other, List.of(), projects, names); + } + + private static RexNode castToVarchar(RexBuilder rexBuilder, RexNode expr, RelDataType fieldType) { + RelDataTypeFactory typeFactory = rexBuilder.getTypeFactory(); + RelDataType varcharType = + typeFactory.createTypeWithNullability( + typeFactory.createSqlType(SqlTypeName.VARCHAR), fieldType.isNullable()); + return rexBuilder.makeCast(varcharType, expr); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtNormalizeRule.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtNormalizeRule.java new file mode 100644 index 00000000000..b15d830d412 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtNormalizeRule.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import java.util.Optional; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.api.spec.datetime.DatetimeExtension.UdtMapping; + +/** + * Temporary patch that rewrites datetime UDT return types on RexCall nodes to standard Calcite + * types. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +class DatetimeUdtNormalizeRule extends RelHomogeneousShuttle { + + static final DatetimeUdtNormalizeRule INSTANCE = new DatetimeUdtNormalizeRule(); + + @Override + public RelNode visit(RelNode other) { + RelNode visited = super.visit(other); + RexBuilder rexBuilder = visited.getCluster().getRexBuilder(); + RelDataTypeFactory typeFactory = rexBuilder.getTypeFactory(); + return visited.accept( + new RexShuttle() { + @Override + public RexNode visitCall(RexCall call) { + call = (RexCall) super.visitCall(call); + Optional mapping = UdtMapping.fromUdtType(call.getType()); + if (mapping.isEmpty()) { + return call; + } + + // Normalize UDT return type to standard Calcite DATE/TIME/TIMESTAMP + UdtMapping m = mapping.get(); + SqlTypeName stdTypeName = m.getStdType(); + RelDataType baseType = + stdTypeName.allowsPrec() + ? typeFactory.createSqlType( + stdTypeName, typeFactory.getTypeSystem().getMaxPrecision(stdTypeName)) + : typeFactory.createSqlType(stdTypeName); + RelDataType stdType = + typeFactory.createTypeWithNullability(baseType, call.getType().isNullable()); + return call.clone(stdType, call.getOperands()); + } + }); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/search/NamedArgRewriter.java b/api/src/main/java/org/opensearch/sql/api/spec/search/NamedArgRewriter.java new file mode 100644 index 00000000000..8627a76f2cf --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/search/NamedArgRewriter.java @@ -0,0 +1,75 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.search; + +import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.util.SqlShuttle; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Pre-validation rewriter for backward compatibility with non-standard named-argument syntax (e.g., + * {@code operator='AND'} instead of {@code operator => 'AND'}). Normalizes relevance function calls + * into MAP-based form so SQL and PPL paths produce identical query plans for pushdown rules. + * + *

This rewriter is subject to removal if we adopt standard SQL named-argument syntax. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public final class NamedArgRewriter extends SqlShuttle { + + public static final NamedArgRewriter INSTANCE = new NamedArgRewriter(); + + @Override + public @Nullable SqlNode visit(SqlCall call) { + SqlCall visited = (SqlCall) super.visit(call); + return UnifiedFunctionSpec.of(visited.getOperator().getName()) + .filter(UnifiedFunctionSpec.RELEVANCE::contains) + .map(spec -> (SqlNode) rewriteToMaps(visited, spec.getParamNames())) + .orElse(visited); + } + + /** + * Rewrites each argument into a MAP entry. For match(name, 'John', operator='AND'): + *

  • Positional arg: name → MAP('field', name) + *
  • Named arg: operator='AND' → MAP('operator', 'AND') + */ + private static SqlCall rewriteToMaps(SqlCall call, List paramNames) { + List operands = call.getOperandList(); + SqlNode[] maps = new SqlNode[operands.size()]; + for (int i = 0; i < operands.size(); i++) { + SqlNode op = operands.get(i); + if (op instanceof SqlCall eq && op.getKind() == SqlKind.EQUALS) { + SqlNode key = eq.operand(0); + String name = + key instanceof SqlIdentifier ident + ? ident.getSimple() + : key.toString(); // avoid backtick-decorated keys for reserved words + maps[i] = toMap(name, eq.operand(1)); + } else { + if (i >= paramNames.size()) { + throw new IllegalArgumentException( + String.format("Invalid arguments for function '%s'", call.getOperator().getName())); + } + maps[i] = toMap(paramNames.get(i), op); + } + } + return call.getOperator().createCall(call.getParserPosition(), maps); + } + + private static SqlNode toMap(String key, SqlNode value) { + return SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR.createCall( + SqlParserPos.ZERO, SqlLiteral.createCharString(key, SqlParserPos.ZERO), value); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/search/SearchExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/search/SearchExtension.java new file mode 100644 index 00000000000..159560067c5 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/search/SearchExtension.java @@ -0,0 +1,27 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.search; + +import java.util.List; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.util.SqlVisitor; +import org.opensearch.sql.api.spec.LanguageSpec; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** Search Extension: relevance functions and named argument rewriting. */ +public class SearchExtension implements LanguageSpec.LanguageExtension { + + @Override + public SqlOperatorTable operators() { + return UnifiedFunctionSpec.RELEVANCE.operatorTable(); + } + + @Override + public List> postParseRules() { + return List.of(NamedArgRewriter.INSTANCE); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java new file mode 100644 index 00000000000..a16fa116b42 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedFunctionSpecTest.java @@ -0,0 +1,79 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.Timestamp; +import org.apache.calcite.rel.RelNode; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.sql.api.compiler.UnifiedQueryCompiler; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; +import org.opensearch.sql.executor.QueryType; + +/** + * Tests for scalar functions registered in {@link UnifiedFunctionSpec#SCALAR}. Verifies planning + * (function resolves correctly) and execution (produces correct results in-memory). + */ +public class UnifiedFunctionSpecTest extends UnifiedQueryTestBase { + + private UnifiedQueryCompiler compiler; + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Before + public void setUp() { + super.setUp(); + compiler = new UnifiedQueryCompiler(context); + } + + @Test + public void testLength() throws Exception { + assertEquals(5, eval("LENGTH('hello')")); + assertEquals(0, eval("LENGTH('')")); + } + + @Test + public void testRegexpReplace() throws Exception { + assertEquals("XbcXbc", eval("REGEXP_REPLACE('abcabc', 'a', 'X')")); + assertEquals("hello", eval("REGEXP_REPLACE('hello', 'xyz', 'X')")); + } + + @Test + public void testDateTrunc() throws Exception { + // Plan preserves DATE_TRUNC (late binding — not rewritten until compilation) + givenQuery( + "SELECT DATE_TRUNC('minute', TIMESTAMP '2023-01-01 12:34:56') FROM catalog.employees") + .assertPlanContains("DATE_TRUNC('minute', 2023-01-01 12:34:56)"); + + // Execution rewrites to FLOOR and produces truncated timestamp + Object result = eval("DATE_TRUNC('hour', TIMESTAMP '2023-07-15 14:30:45')"); + assertEquals(Timestamp.valueOf("2023-07-15 14:00:00"), result); + } + + @Test + public void testFunctionSpecLookup() { + assertTrue(UnifiedFunctionSpec.of("length").isPresent()); + assertTrue(UnifiedFunctionSpec.of("regexp_replace").isPresent()); + assertTrue(UnifiedFunctionSpec.of("date_trunc").isPresent()); + } + + private Object eval(String expr) throws Exception { + RelNode plan = planner.plan("SELECT " + expr + " AS v FROM (VALUES (0)) AS t(dummy)"); + try (PreparedStatement stmt = compiler.compile(plan); + ResultSet rs = stmt.executeQuery()) { + assertTrue(rs.next()); + return rs.getObject(1); + } + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java index a3ad73f700a..f0111d06363 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java @@ -33,6 +33,10 @@ public void testContextCreationWithDefaults() { "Settings should have default system limits", SysLimit.DEFAULT, SysLimit.fromSettings(context.getSettings())); + assertEquals( + "PPL_REX_MAX_MATCH_LIMIT default should be 10", + Integer.valueOf(10), + context.getSettings().getSettingValue(PPL_REX_MAX_MATCH_LIMIT)); } @Test @@ -43,10 +47,15 @@ public void testContextCreationWithCustomConfig() { .catalog("opensearch", testSchema) .cacheMetadata(true) .setting("plugins.query.size_limit", 200) + .setting("plugins.ppl.rex.max_match.limit", 5) .build(); Integer querySizeLimit = context.getSettings().getSettingValue(QUERY_SIZE_LIMIT); assertEquals("Custom setting should be applied", Integer.valueOf(200), querySizeLimit); + assertEquals( + "Cluster-side override for PPL_REX_MAX_MATCH_LIMIT should reach the unified path", + Integer.valueOf(5), + context.getSettings().getSettingValue(PPL_REX_MAX_MATCH_LIMIT)); } @Test(expected = IllegalArgumentException.class) @@ -63,14 +72,15 @@ public void testMissingQueryType() { UnifiedQueryContext.builder().catalog("opensearch", testSchema).build(); } - @Test(expected = IllegalArgumentException.class) - public void testUnsupportedQueryType() { + @Test + public void testSqlQueryType() { UnifiedQueryContext context = UnifiedQueryContext.builder() - .language(QueryType.SQL) // only PPL is supported for now + .language(QueryType.SQL) .catalog("opensearch", testSchema) .build(); - new UnifiedQueryPlanner(context); + UnifiedQueryPlanner planner = new UnifiedQueryPlanner(context); + assertNotNull("SQL planner should be created", planner); } @Test(expected = IllegalArgumentException.class) diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java new file mode 100644 index 00000000000..855d3d2788d --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java @@ -0,0 +1,260 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; + +import java.util.List; +import java.util.Map; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.impl.AbstractSchema; +import org.junit.Test; +import org.opensearch.sql.executor.QueryType; + +public class UnifiedQueryPlannerSqlTest extends UnifiedQueryTestBase { + + private final AbstractSchema testDeepSchema = + new AbstractSchema() { + @Override + protected Map getSubSchemaMap() { + return Map.of("opensearch", testSchema); + } + }; + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Test + public void testSqlQueryPlanning() { + givenQuery( + """ + SELECT * + FROM catalog.employees\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSqlSelectSpecificColumns() { + givenQuery( + """ + SELECT id, name + FROM catalog.employees\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSqlFilterQueryPlanning() { + givenQuery( + """ + SELECT name + FROM catalog.employees + WHERE age > 30\ + """) + .assertPlan( + """ + LogicalProject(name=[$1]) + LogicalFilter(condition=[>($2, 30)]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSqlAggregateQueryPlanning() { + givenQuery( + """ + SELECT department, count(*) AS cnt + FROM catalog.employees + GROUP BY department\ + """) + .assertPlan( + """ + LogicalAggregate(group=[{0}], cnt=[COUNT()]) + LogicalProject(department=[$3]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSqlJoinQueryPlanning() { + givenQuery( + """ + SELECT a.id, b.name + FROM catalog.employees a + JOIN catalog.employees b ON a.id = b.age\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$5]) + LogicalJoin(condition=[=($0, $6)], joinType=[inner]) + LogicalTableScan(table=[[catalog, employees]]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSqlOrderByQueryPlanning() { + givenQuery( + """ + SELECT name + FROM catalog.employees + ORDER BY age DESC\ + """) + .assertPlan( + """ + LogicalProject(name=[$0]) + LogicalSort(sort0=[$1], dir0=[DESC]) + LogicalProject(name=[$1], age=[$2]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSqlSubqueryPlanning() { + // Calcite represents scalar subqueries as $SCALAR_QUERY{...} with embedded plan text whose + // formatting (whitespace, line breaks) may vary across versions. Assert output fields only. + givenQuery( + """ + SELECT name + FROM catalog.employees + WHERE age > (SELECT avg(age) FROM catalog.employees)\ + """) + .assertFields("name"); + } + + @Test + public void testSqlCteQueryPlanning() { + // CTE is inlined by Calcite — same plan as a direct filter query + givenQuery( + """ + WITH seniors AS ( + SELECT name, age FROM catalog.employees WHERE age > 30 + ) + SELECT name + FROM seniors\ + """) + .assertPlan( + """ + LogicalProject(name=[$1]) + LogicalFilter(condition=[>($2, 30)]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSqlQueryPlanningWithDefaultNamespace() { + UnifiedQueryContext sqlContext = + UnifiedQueryContext.builder() + .language(QueryType.SQL) + .catalog("opensearch", testSchema) + .defaultNamespace("opensearch") + .build(); + UnifiedQueryPlanner sqlPlanner = new UnifiedQueryPlanner(sqlContext); + + assertNotNull("Plan should be created", sqlPlanner.plan("SELECT * FROM opensearch.employees")); + assertNotNull("Plan should be created", sqlPlanner.plan("SELECT * FROM employees")); + } + + @Test + public void testSqlQueryPlanningWithDefaultNamespaceMultiLevel() { + UnifiedQueryContext sqlContext = + UnifiedQueryContext.builder() + .language(QueryType.SQL) + .catalog("catalog", testDeepSchema) + .defaultNamespace("catalog.opensearch") + .build(); + UnifiedQueryPlanner sqlPlanner = new UnifiedQueryPlanner(sqlContext); + + assertNotNull( + "Plan should be created", sqlPlanner.plan("SELECT * FROM catalog.opensearch.employees")); + assertNotNull("Plan should be created", sqlPlanner.plan("SELECT * FROM employees")); + + assertThrows( + IllegalStateException.class, () -> sqlPlanner.plan("SELECT * FROM opensearch.employees")); + } + + @Test + public void testSqlQueryPlanningWithMultipleCatalogs() { + UnifiedQueryContext sqlContext = + UnifiedQueryContext.builder() + .language(QueryType.SQL) + .catalog("catalog1", testSchema) + .catalog("catalog2", testSchema) + .build(); + UnifiedQueryPlanner sqlPlanner = new UnifiedQueryPlanner(sqlContext); + + assertNotNull( + "Plan should be created", + sqlPlanner.plan( + """ + SELECT a.id + FROM catalog1.employees a + JOIN catalog2.employees b ON a.id = b.id\ + """)); + } + + @Test + public void testInvalidSqlThrowsException() { + assertThrows(IllegalStateException.class, () -> planner.plan("SELECT FROM")); + } + + @Test + public void testNonQueryStatementsBlockedByWhitelist() { + List.of( + """ + INSERT INTO catalog.employees (id, name, age, department) + VALUES (99, 'injected', 0, 'hacked')\ + """, + """ + DELETE FROM catalog.employees + WHERE age > 30\ + """, + """ + UPDATE catalog.employees + SET department = 'Fired' + WHERE age > 50\ + """, + """ + EXPLAIN PLAN FOR + SELECT * FROM catalog.employees\ + """, + """ + MERGE INTO catalog.employees AS t + USING (SELECT 99 AS id) AS s ON t.id = s.id + WHEN MATCHED THEN UPDATE SET name = 'hacked'\ + """, + """ + SHOW TABLES\ + """) + .forEach( + sql -> + givenInvalidQuery(sql).assertErrorMessage("Only query statements are supported")); + } + + @Test + public void testNonQueryStatementsBlockedByParser() { + givenInvalidQuery( + """ + CREATE MATERIALIZED VIEW mv AS + SELECT department, count(*) + FROM catalog.employees + GROUP BY department\ + """) + .assertErrorMessage("Encountered"); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java index 9ad7aa42155..41ed12670f8 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java @@ -106,7 +106,7 @@ public void testPPLQueryPlanningWithMultipleCatalogsAndDefaultNamespace() { assertNotNull("Plan should be created with multiple catalogs", plan); } - @Test(expected = IllegalStateException.class) + @Test(expected = UnsupportedOperationException.class) public void testUnsupportedStatementType() { planner.plan("explain source = catalog.employees"); // explain statement } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryProfilingTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryProfilingTest.java new file mode 100644 index 00000000000..d1387108a39 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryProfilingTest.java @@ -0,0 +1,110 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.opensearch.sql.monitor.profile.MetricName.EXECUTE; + +import java.io.IOException; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import org.apache.calcite.rel.RelNode; +import org.junit.Test; +import org.opensearch.sql.api.compiler.UnifiedQueryCompiler; +import org.opensearch.sql.monitor.profile.QueryProfile; +import org.opensearch.sql.monitor.profile.QueryProfiling; + +/** Tests for profiling across unified query components and the measure() API. */ +public class UnifiedQueryProfilingTest extends UnifiedQueryTestBase { + + @Override + protected UnifiedQueryContext.Builder contextBuilder() { + return super.contextBuilder().profiling(true); + } + + @Test + public void testProfilingEnabled() { + assertTrue(QueryProfiling.current().isEnabled()); + } + + @Test + public void testProfilingDisabledByDefault() throws Exception { + try (UnifiedQueryContext ctx = super.contextBuilder().build()) { + assertFalse(QueryProfiling.current().isEnabled()); + } + } + + @Test + public void testGetProfileReturnsEmptyWhenDisabled() throws Exception { + try (UnifiedQueryContext ctx = super.contextBuilder().build()) { + assertFalse(ctx.getProfile().isPresent()); + } + } + + @Test + public void testMeasureExecutesWhenProfilingDisabled() throws Exception { + try (UnifiedQueryContext ctx = super.contextBuilder().build()) { + assertEquals("done", ctx.measure(EXECUTE, () -> "done")); + assertFalse(ctx.getProfile().isPresent()); + } + } + + @Test + public void testPlannerAutoProfilesAnalyzePhase() { + planner.plan("source = catalog.employees"); + assertTrue(context.getProfile().get().getPhases().get("analyze").getTimeMillis() >= 0); + } + + @Test + public void testCompilerAutoProfilesOptimizePhase() { + RelNode plan = planner.plan("source = catalog.employees"); + new UnifiedQueryCompiler(context).compile(plan); + assertTrue(context.getProfile().get().getPhases().get("optimize").getTimeMillis() >= 0); + } + + @Test + public void testMeasureRecordsMetric() throws Exception { + assertEquals("done", context.measure(EXECUTE, () -> "done")); + assertTrue(context.getProfile().get().getPhases().get("execute").getTimeMillis() >= 0); + } + + @Test + public void testFullPipelineProfiling() throws Exception { + RelNode plan = planner.plan("source = catalog.employees"); + PreparedStatement stmt = new UnifiedQueryCompiler(context).compile(plan); + ResultSet rs = context.measure(EXECUTE, stmt::executeQuery); + + QueryProfile profile = context.getProfile().get(); + assertTrue(profile.getSummary().getTotalTimeMillis() >= 0); + assertTrue(profile.getPhases().get("analyze").getTimeMillis() >= 0); + assertTrue(profile.getPhases().get("optimize").getTimeMillis() >= 0); + assertTrue(profile.getPhases().get("execute").getTimeMillis() >= 0); + assertNotNull(profile.getPlan()); + } + + @Test + public void testProfilingClearedAfterClose() throws Exception { + assertTrue(QueryProfiling.current().isEnabled()); + context.close(); + assertFalse(QueryProfiling.current().isEnabled()); + } + + @Test + public void testMeasurePropagatesException() { + assertThrows( + IOException.class, + () -> + context.measure( + EXECUTE, + () -> { + throw new IOException("test error"); + })); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java new file mode 100644 index 00000000000..66df9c2e075 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java @@ -0,0 +1,187 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import org.junit.Test; +import org.opensearch.sql.executor.QueryType; + +/** + * Tests for relevance search functions in SQL planning path using V2/PPL syntax. Mirrors the PPL + * tests in {@link UnifiedRelevanceSearchTest} with equivalent SQL queries. Both paths produce + * identical MAP-based plans for pushdown rules. + */ +public class UnifiedRelevanceSearchSqlTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Test + public void testMatch() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match(name, 'John')\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrase() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_phrase(name, 'John Doe')\ + """) + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'John Doe'))"); + } + + @Test + public void testMatchBoolPrefix() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_bool_prefix(name, 'John')\ + """) + .assertPlanContains("match_bool_prefix(MAP('field', $1), MAP('query', 'John'))"); + } + + @Test + public void testMatchPhrasePrefix() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_phrase_prefix(name, 'John')\ + """) + .assertPlanContains("match_phrase_prefix(MAP('field', $1), MAP('query', 'John'))"); + } + + @Test + public void testMultiMatch() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(name, 'John')\ + """) + .assertPlanContains("multi_match(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testSimpleQueryString() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE simple_query_string(name, 'John')\ + """) + .assertPlanContains("simple_query_string(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testQueryString() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE query_string(name, 'John')\ + """) + .assertPlanContains("query_string(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testMatchWithOptions() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match(name, 'John', operator='AND', boost=2.0)\ + """) + .assertPlanContains( + "match(MAP('field', $1), MAP('query', 'John')," + + " MAP('operator', 'AND'), MAP('boost', 2.0:DECIMAL(2, 1)))"); + } + + @Test + public void testMatchMissingArguments() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE match('John')\ + """) + .assertErrorMessage( + "No match found for function signature match(<(CHAR(5), CHAR(4)) MAP>)"); + } + + @Test + public void testUnknownRelevanceFunction() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE unknown_relevance(name, 'John')\ + """) + .assertErrorMessage( + "No match found for function signature unknown_relevance(, )"); + } + + @Test + public void testNonRelevanceFunctionUnaffectedByRewriter() { + givenQuery( + """ + SELECT upper(name) FROM catalog.employees\ + """) + .assertPlan( + """ + LogicalProject(EXPR$0=[UPPER($1)]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + // FIXME: Calcite's SQL parser does not support V2 bracket field list syntax ['field1', 'field2']. + // Multi-field relevance functions only accept a single column reference in the Calcite SQL path. + + @Test + public void testMultiMatchBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testMultiMatchFieldBoostNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(['name' ^ 2.0, 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testSimpleQueryStringBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE simple_query_string(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testQueryStringBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE query_string(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java new file mode 100644 index 00000000000..a80ae190868 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java @@ -0,0 +1,78 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import org.junit.Test; + +/** Tests for relevance search functions in PPL planning path. */ +public class UnifiedRelevanceSearchTest extends UnifiedQueryTestBase { + + @Test + public void testMatch() { + givenQuery("source=catalog.employees | where match(name, 'John')") + .assertPlan( + """ + LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrase() { + givenQuery("source=catalog.employees | where match_phrase(name, 'John Doe')") + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'John Doe':VARCHAR))"); + } + + @Test + public void testMatchBoolPrefix() { + givenQuery("source=catalog.employees | where match_bool_prefix(name, 'John')") + .assertPlanContains("match_bool_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMatchPhrasePrefix() { + givenQuery("source=catalog.employees | where match_phrase_prefix(name, 'John')") + .assertPlanContains("match_phrase_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMultiMatch() { + givenQuery("source=catalog.employees | where multi_match(['name', 'department'], 'John')") + .assertPlanContains( + "multi_match(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE," + + " 'department':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testSimpleQueryString() { + givenQuery("source=catalog.employees | where simple_query_string(['name'], 'John')") + .assertPlanContains( + "simple_query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE))," + + " MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testQueryString() { + givenQuery("source=catalog.employees | where query_string(['name'], 'John')") + .assertPlanContains( + "query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE))," + + " MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMatchMissingArguments() { + givenInvalidQuery("source=catalog.employees | where match('John')") + .assertErrorMessage( + "[)] is not a valid term at this part of the query:" + + " '...| where match('John')' <-- HERE. Expecting tokens: ','"); + } + + @Test + public void testUnknownRelevanceFunction() { + givenInvalidQuery("source=catalog.employees | where unknown_relevance(name, 'John')") + .assertErrorMessage("[(] is not a valid term at this part of the query"); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedSqlSpecTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedSqlSpecTest.java new file mode 100644 index 00000000000..97ddd07d0ac --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedSqlSpecTest.java @@ -0,0 +1,118 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import java.util.Map; +import org.apache.calcite.schema.Table; +import org.apache.calcite.schema.impl.AbstractSchema; +import org.junit.Test; +import org.opensearch.sql.executor.QueryType; + +public class UnifiedSqlSpecTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Override + protected UnifiedQueryContext.Builder contextBuilder() { + AbstractSchema schema = + new AbstractSchema() { + @Override + protected Map getTableMap() { + return Map.of( + "employees", createEmployeesTable(), + "logs-2024-01", createEmployeesTable()); + } + }; + return UnifiedQueryContext.builder() + .language(queryType()) + .catalog(DEFAULT_CATALOG, schema) + .defaultNamespace(DEFAULT_CATALOG); + } + + @Test + public void hyphenatedTableIdentifier() { + givenQuery("SELECT * FROM logs-2024-01") + .assertPlanContains("LogicalTableScan(table=[[catalog, logs-2024-01]])"); + } + + @Test + public void backtickQuotedIdentifiers() { + givenQuery("SELECT `name` FROM employees").assertPlanContains("LogicalProject(name=[$1])"); + } + + @Test + public void doubleQuotedStringLiteral() { + givenQuery("SELECT \"Hello\" AS greeting FROM employees") + .assertPlanContains("LogicalProject(greeting=['Hello'])"); + } + + @Test + public void matchNotReserved() { + givenQuery("SELECT * FROM employees WHERE match(name, 'Hattie')") + .assertPlanContains("match(MAP('field', $1), MAP('query', 'Hattie'))"); + } + + @Test + public void reservedWordAsAlias() { + givenQuery("SELECT age AS year FROM employees").assertPlanContains("LogicalProject(year=[$2])"); + } + + @Test + public void limitSyntax() { + givenQuery("SELECT * FROM employees LIMIT 10").assertPlanContains("LogicalSort(fetch=[10])"); + } + + @Test + public void selectWithoutFrom() { + givenQuery("SELECT 1").assertPlanContains("LogicalValues(tuples=[[{ 1 }]])"); + } + + @Test + public void groupByAlias() { + givenQuery("SELECT department AS dept, COUNT(*) AS cnt FROM employees GROUP BY dept") + .assertPlanContains("LogicalAggregate(group=[{0}]"); + } + + @Test + public void groupByOrdinal() { + givenQuery("SELECT name, COUNT(*) AS cnt FROM employees GROUP BY 1") + .assertPlanContains("LogicalAggregate(group=[{0}], cnt=[COUNT()])") + .assertPlanContains("LogicalProject(name=[$1])"); + } + + @Test + public void castBooleanToInteger() { + givenQuery("SELECT CAST(true AS INTEGER) AS val FROM employees") + .assertPlanContains("LogicalProject(val=[1])"); + } + + @Test + public void integerComparedToString() { + givenQuery("SELECT * FROM employees WHERE age > '30'") + .assertPlanContains("condition=[>($2, CAST('30'):INTEGER NOT NULL)]"); + } + + @Test + public void matchFunction() { + givenQuery("SELECT * FROM employees WHERE match(name, 'John')") + .assertPlanContains("match(MAP('field', $1), MAP('query', 'John'))"); + } + + @Test + public void matchPhraseFunction() { + givenQuery("SELECT * FROM employees WHERE match_phrase(name, 'quick fox')") + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'quick fox'))"); + } + + @Test + public void namedParametersSyntax() { + givenQuery("SELECT * FROM employees WHERE match_phrase(name, 'quick fox', slop=2)") + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'quick fox')"); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java new file mode 100644 index 00000000000..0cba9f7b712 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserSqlTest.java @@ -0,0 +1,115 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import org.apache.calcite.sql.parser.SqlParserFixture; +import org.junit.Test; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.executor.QueryType; + +/** + * SQL parser tests using Calcite's {@link SqlParserFixture} for idiomatic parse-unparse assertions. + * Parser config is read from {@link org.opensearch.sql.api.UnifiedQueryContext} to stay in sync + * with production. + */ +public class UnifiedQueryParserSqlTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Test + public void testParseSelectStar() { + sql("SELECT * FROM catalog.employees") + .ok( + """ + SELECT * + FROM `catalog`.`employees`\ + """); + } + + @Test + public void testParseSelectColumns() { + sql("SELECT id, name FROM catalog.employees") + .ok( + """ + SELECT `id`, `name` + FROM `catalog`.`employees`\ + """); + } + + @Test + public void testParseFilter() { + sql(""" + SELECT name + FROM catalog.employees + WHERE age > 30\ + """) + .ok( + """ + SELECT `name` + FROM `catalog`.`employees` + WHERE (`age` > 30)\ + """); + } + + @Test + public void testParseAggregate() { + sql(""" + SELECT department, count(*) AS cnt + FROM catalog.employees + GROUP BY department\ + """) + .ok( + """ + SELECT `department`, COUNT(*) AS `cnt` + FROM `catalog`.`employees` + GROUP BY `department`\ + """); + } + + @Test + public void testParseOrderBy() { + sql(""" + SELECT name + FROM catalog.employees + ORDER BY age DESC\ + """) + .ok( + """ + SELECT `name` + FROM `catalog`.`employees` + ORDER BY `age` DESC\ + """); + } + + @Test + public void testParseJoin() { + sql(""" + SELECT a.id, b.name + FROM catalog.employees a + JOIN catalog.employees b ON a.id = b.age\ + """) + .ok( + """ + SELECT `a`.`id`, `b`.`name` + FROM `catalog`.`employees` AS `a` + INNER JOIN `catalog`.`employees` AS `b` ON (`a`.`id` = `b`.`age`)\ + """); + } + + @Test + public void testSyntaxErrorFails() { + sql("SELECT ^FROM^").fails("(?s).*Incorrect syntax near the keyword 'FROM'.*"); + } + + private SqlParserFixture sql(String sql) { + return SqlParserFixture.DEFAULT + .withConfig(c -> context.getPlanContext().config.getParserConfig()) + .sql(sql); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java new file mode 100644 index 00000000000..1b6b5181aef --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/parser/UnifiedQueryParserTest.java @@ -0,0 +1,87 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import static java.util.Collections.emptyList; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.opensearch.sql.ast.dsl.AstDSL.agg; +import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; +import static org.opensearch.sql.ast.dsl.AstDSL.alias; +import static org.opensearch.sql.ast.dsl.AstDSL.allFields; +import static org.opensearch.sql.ast.dsl.AstDSL.compare; +import static org.opensearch.sql.ast.dsl.AstDSL.defaultStatsArgs; +import static org.opensearch.sql.ast.dsl.AstDSL.eval; +import static org.opensearch.sql.ast.dsl.AstDSL.exprList; +import static org.opensearch.sql.ast.dsl.AstDSL.field; +import static org.opensearch.sql.ast.dsl.AstDSL.filter; +import static org.opensearch.sql.ast.dsl.AstDSL.function; +import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.let; +import static org.opensearch.sql.ast.dsl.AstDSL.project; +import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; +import static org.opensearch.sql.ast.dsl.AstDSL.relation; + +import org.junit.Test; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.common.antlr.SyntaxCheckException; + +public class UnifiedQueryParserTest extends UnifiedQueryTestBase { + + @Test + public void testParseSource() { + assertEqual( + "source = catalog.employees", + project(relation(qualifiedName("catalog", "employees")), allFields())); + } + + @Test + public void testParseFilter() { + assertEqual( + "source = catalog.employees | where age > 30", + project( + filter( + relation(qualifiedName("catalog", "employees")), + compare(">", field("age"), intLiteral(30))), + allFields())); + } + + @Test + public void testParseEval() { + assertEqual( + "source = catalog.employees | eval f = abs(id)", + project( + eval( + relation(qualifiedName("catalog", "employees")), + let(field("f"), function("abs", field("id")))), + allFields())); + } + + @Test + public void testParseStats() { + assertEqual( + "source = catalog.employees | stats count(age) by department", + project( + agg( + relation(qualifiedName("catalog", "employees")), + exprList(alias("count(age)", aggregate("count", field("age")))), + emptyList(), + exprList(alias("department", field("department"))), + defaultStatsArgs()), + allFields())); + } + + @Test + public void testSyntaxErrorThrows() { + assertThrows(SyntaxCheckException.class, () -> context.getParser().parse("not a valid query")); + } + + private void assertEqual(String query, UnresolvedPlan expected) { + UnresolvedPlan actual = (UnresolvedPlan) context.getParser().parse(query); + assertEquals(expected, actual); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeExtensionTest.java b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeExtensionTest.java new file mode 100644 index 00000000000..fc089150109 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeExtensionTest.java @@ -0,0 +1,225 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import static org.apache.calcite.sql.type.SqlTypeName.BIGINT; +import static org.apache.calcite.sql.type.SqlTypeName.DATE; +import static org.apache.calcite.sql.type.SqlTypeName.INTEGER; +import static org.apache.calcite.sql.type.SqlTypeName.TIME; +import static org.apache.calcite.sql.type.SqlTypeName.TIMESTAMP; +import static org.apache.calcite.sql.type.SqlTypeName.VARCHAR; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.schema.Table; +import org.apache.calcite.schema.impl.AbstractSchema; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.sql.api.ResultSetAssertion; +import org.opensearch.sql.api.UnifiedQueryContext; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.api.compiler.UnifiedQueryCompiler; +import org.opensearch.sql.executor.QueryType; + +public class DatetimeExtensionTest extends UnifiedQueryTestBase implements ResultSetAssertion { + + private UnifiedQueryCompiler compiler; + + @Override + protected UnifiedQueryContext.Builder contextBuilder() { + return UnifiedQueryContext.builder() + .language(QueryType.PPL) + .catalog( + DEFAULT_CATALOG, + new AbstractSchema() { + @Override + protected Map getTableMap() { + return Map.of("events", createEventsTable()); + } + }); + } + + @Before + public void setUp() { + super.setUp(); + compiler = new UnifiedQueryCompiler(context); + } + + private Table createEventsTable() { + return SimpleTable.builder() + .col("id", INTEGER) + .col("name", VARCHAR) + .col("hire_date", DATE) + .col("start_time", TIME) + .col("created_at", TIMESTAMP) + .row(new Object[] {1, "Alice", 19738, 43200000, 1705305600000L}) + .row(new Object[] {2, "Bob", 19894, 50400000, 1718841600000L}) + .build(); + } + + @Test + public void testUdfResultNormalizedAndCastToVarchar() { + var plan = + givenQuery( + """ + source = catalog.events \ + | eval d = DATE(name), t = TIME(name), ts = TIMESTAMP(name) \ + | fields d, t, ts\ + """) + .assertPlan( + """ + LogicalProject(d=[CAST($0):VARCHAR], t=[CAST($1):VARCHAR], ts=[CAST($2):VARCHAR]) + LogicalProject(d=[DATE($1)], t=[TIME($1)], ts=[TIMESTAMP($1)]) + LogicalTableScan(table=[[catalog, events]]) + """) + .plan(); + assertCallType(plan, "DATE", DATE); + assertCallType(plan, "TIME", TIME, 9); + assertCallType(plan, "TIMESTAMP", TIMESTAMP, 9); + } + + @Test + public void testNestedUdfCallsNormalized() { + var plan = + givenQuery("source = catalog.events | eval d = DATEDIFF(DATE(name), DATE(name)) | fields d") + .assertPlan( + """ + LogicalProject(d=[DATEDIFF(DATE($1), DATE($1))]) + LogicalTableScan(table=[[catalog, events]]) + """) + .plan(); + assertCallType(plan, "DATE", DATE); + assertCallType(plan, "DATEDIFF", BIGINT); + } + + @Test + public void testDateLiteralCastToVarchar() { + var plan = + givenQuery("source = catalog.events | eval d = DATE('2024-01-01') | fields d") + .assertPlan( + """ + LogicalProject(d=[CAST($0):VARCHAR]) + LogicalProject(d=[DATE('2024-01-01':VARCHAR)]) + LogicalTableScan(table=[[catalog, events]]) + """) + .plan(); + assertCallType(plan, "DATE", DATE); + } + + @Test + public void testFilterWithTimestampLiteral() { + var plan = + givenQuery( + """ + source = catalog.events | where created_at > "2024-01-01T00:00:00Z" | fields id\ + """) + .assertPlan( + """ + LogicalProject(id=[$0]) + LogicalFilter(condition=[>($4, TIMESTAMP('2024-01-01T00:00:00Z':VARCHAR))]) + LogicalTableScan(table=[[catalog, events]]) + """) + .plan(); + assertCallType(plan, "TIMESTAMP", TIMESTAMP, 9); + } + + @Test + public void testComparisonWithDatetimeUdf() { + var plan = + givenQuery("source = catalog.events | where created_at < DATE(name) | fields id") + .assertPlan( + """ + LogicalProject(id=[$0]) + LogicalFilter(condition=[<($4, TIMESTAMP(DATE($1)))]) + LogicalTableScan(table=[[catalog, events]]) + """) + .plan(); + assertCallType(plan, "DATE", DATE); + assertCallType(plan, "TIMESTAMP", TIMESTAMP, 9); + } + + @Test + public void testAllStandardDatetimeTypesCastToVarchar() { + givenQuery("source = catalog.events | fields hire_date, start_time, created_at") + .assertPlan( + """ + LogicalProject(hire_date=[CAST($0):VARCHAR NOT NULL], start_time=[CAST($1):VARCHAR NOT NULL], created_at=[CAST($2):VARCHAR NOT NULL]) + LogicalProject(hire_date=[$2], start_time=[$3], created_at=[$4]) + LogicalTableScan(table=[[catalog, events]]) + """); + } + + @Test + public void testNonDatetimeFieldsNotWrapped() { + givenQuery("source = catalog.events | fields id, name") + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1]) + LogicalTableScan(table=[[catalog, events]]) + """); + } + + @Test + public void testOutputCastCanCompileAndExecute() throws Exception { + RelNode plan = + planner.plan("source = catalog.events | fields hire_date, start_time, created_at"); + try (PreparedStatement statement = compiler.compile(plan)) { + ResultSet resultSet = statement.executeQuery(); + verify(resultSet) + .expectSchema( + col("hire_date", java.sql.Types.VARCHAR), + col("start_time", java.sql.Types.VARCHAR), + col("created_at", java.sql.Types.VARCHAR)) + .expectData( + row("2024-01-16", "12:00:00", "2024-01-15 08:00:00"), + row("2024-06-20", "14:00:00", "2024-06-20 00:00:00")); + } + } + + private static void assertCallType(RelNode plan, String operatorName, SqlTypeName expectedType) { + assertCallType(plan, operatorName, expectedType, -1); + } + + private static void assertCallType( + RelNode plan, String operatorName, SqlTypeName expectedType, int expectedPrecision) { + AtomicReference ref = new AtomicReference<>(); + plan.accept( + new RelHomogeneousShuttle() { + @Override + public RelNode visit(RelNode other) { + RelNode visited = super.visit(other); + visited.accept( + new RexShuttle() { + @Override + public RexNode visitCall(RexCall call) { + if (ref.get() == null + && call.getOperator().getName().equalsIgnoreCase(operatorName)) { + ref.set(call); + } + return super.visitCall(call); + } + }); + return visited; + } + }); + assertNotNull("No RexCall found for: " + operatorName, ref.get()); + assertEquals(operatorName + " type", expectedType, ref.get().getType().getSqlTypeName()); + if (expectedPrecision >= 0) { + assertEquals( + operatorName + " precision", expectedPrecision, ref.get().getType().getPrecision()); + } + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/spec/search/NamedArgRewriterTest.java b/api/src/test/java/org/opensearch/sql/api/spec/search/NamedArgRewriterTest.java new file mode 100644 index 00000000000..52395865548 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/spec/search/NamedArgRewriterTest.java @@ -0,0 +1,108 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.search; + +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import org.apache.calcite.avatica.util.Casing; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.junit.Test; + +/** Unit tests for {@link NamedArgRewriter}. */ +public class NamedArgRewriterTest { + + /** Match production parser config in UnifiedQueryContext. */ + private static final SqlParser.Config PARSER_CONFIG = + SqlParser.Config.DEFAULT.withUnquotedCasing(Casing.UNCHANGED); + + @Test + public void testPositionalArgsRewrittenToMaps() throws Exception { + SqlNode result = rewrite("SELECT * FROM t WHERE \"match\"(name, 'John')"); + assertContains(result, "MAP['field', `name`], MAP['query', 'John']"); + } + + @Test + public void testEqualsArgRewrittenToMap() throws Exception { + SqlNode result = rewrite("SELECT * FROM t WHERE \"match\"(name, 'John', operator='AND')"); + assertContains(result, "MAP['query', 'John'], MAP['operator', 'AND']"); + } + + @Test + public void testMultipleEqualsArgs() throws Exception { + SqlNode result = + rewrite("SELECT * FROM t WHERE \"match\"(name, 'John', operator='AND', boost=2.0)"); + assertContains(result, "MAP['operator', 'AND'], MAP['boost', 2.0]"); + } + + @Test + public void testMultiMatchUsesFieldsParamName() throws Exception { + SqlNode result = rewrite("SELECT * FROM t WHERE multi_match(name, 'John')"); + assertContains(result, "MAP['fields', `name`], MAP['query', 'John']"); + } + + @Test + public void testNonRelevanceFunctionUntouched() throws Exception { + SqlNode parsed = parse("SELECT upper(name) FROM t"); + SqlNode result = parsed.accept(NamedArgRewriter.INSTANCE); + assertSame(parsed, result); + } + + @Test + public void testAllEqualsArgsNoPositional() throws Exception { + // Not valid V2 match syntax, but multi_match supports this form. + // Shuttle treats all = as named options — no positional wrapping. + SqlNode result = rewrite("SELECT * FROM t WHERE multi_match(fields=name, query='John')"); + assertContains(result, "MAP['fields', `name`], MAP['query', 'John']"); + } + + @Test + public void testReservedWordAsNamedArgKey() throws Exception { + // 'escape' is a SQL reserved word and a valid query_string parameter. + // getSimple() must be used instead of toString() to avoid backtick-decorated keys. + SqlNode result = rewrite("SELECT * FROM t WHERE query_string(name, 'test*', \"escape\"=true)"); + assertContains(result, "MAP['escape', TRUE]"); + } + + @Test + public void testEqualsBeforePositionalThrows() throws Exception { + // Not valid V2 syntax — positional must come first. + // = at index 0 goes to EQUALS branch, but remaining positional args exceed paramNames. + try { + rewrite("SELECT * FROM t WHERE \"match\"(operator='AND', name, 'John')"); + fail("Expected IllegalArgumentException for mixed order"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Invalid arguments for function")); + } + } + + @Test + public void testExtraPositionalArgsBeyondParamNamesThrows() throws Exception { + // match has 2 param names (field, query); 3 positional args causes IndexOutOfBounds + try { + rewrite("SELECT * FROM t WHERE \"match\"(a, b, c)"); + fail("Expected IllegalArgumentException for extra positional args"); + } catch (IllegalArgumentException e) { + assertTrue(e.getMessage().contains("Invalid arguments for function")); + } + } + + private static SqlNode rewrite(String sql) throws Exception { + return parse(sql).accept(NamedArgRewriter.INSTANCE); + } + + private static SqlNode parse(String sql) throws Exception { + return SqlParser.create(sql, PARSER_CONFIG).parseStmt(); + } + + private static void assertContains(SqlNode node, String expected) { + String actual = node.toString().replaceAll("\\n", " "); + assertTrue( + "Expected to contain: " + expected + "\nActual: " + actual, actual.contains(expected)); + } +} diff --git a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java index 000b145695a..42df6c5a7ee 100644 --- a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java +++ b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java @@ -7,6 +7,8 @@ import static org.apache.calcite.sql.type.SqlTypeName.INTEGER; import static org.apache.calcite.sql.type.SqlTypeName.VARCHAR; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.util.List; import java.util.Map; @@ -15,6 +17,8 @@ import org.apache.calcite.DataContext; import org.apache.calcite.linq4j.Enumerable; import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.schema.ScannableTable; @@ -55,14 +59,25 @@ protected Map getTableMap() { } }; - context = - UnifiedQueryContext.builder() - .language(QueryType.PPL) - .catalog(DEFAULT_CATALOG, testSchema) - .build(); + context = contextBuilder().build(); planner = new UnifiedQueryPlanner(context); } + /** + * Returns the query type for this test class. Subclasses override to test different languages. + */ + protected QueryType queryType() { + return QueryType.PPL; + } + + /** + * Creates a pre-configured context builder with test schema. Subclasses can override to customize + * context configuration (e.g., enable profiling). + */ + protected UnifiedQueryContext.Builder contextBuilder() { + return UnifiedQueryContext.builder().language(queryType()).catalog(DEFAULT_CATALOG, testSchema); + } + @After public void tearDown() throws Exception { if (context != null) { @@ -128,4 +143,77 @@ public boolean rolledUpColumnValidInsideAgg( return false; } } + + /** Fluent helper for asserting query plan results. */ + protected QueryAssert givenQuery(String query) { + return new QueryAssert(planner.plan(query)); + } + + /** Fluent helper for asserting query planning errors. */ + protected QueryErrorAssert givenInvalidQuery(String query) { + try { + planner.plan(query); + throw new AssertionError("Expected query to fail: " + query); + } catch (Exception e) { + return new QueryErrorAssert(e); + } + } + + /** Fluent assertion on a query planning error. */ + protected static class QueryErrorAssert { + private final Exception error; + + QueryErrorAssert(Exception error) { + this.error = error; + } + + /** Assert the root cause error message contains the expected substring. */ + public QueryErrorAssert assertErrorMessage(String expected) { + Throwable cause = error; + while (cause.getCause() != null) { + cause = cause.getCause(); + } + String msg = cause.getMessage() != null ? cause.getMessage() : cause.getClass().getName(); + assertTrue( + "Expected error to contain: " + expected + "\nActual: " + msg, msg.contains(expected)); + return this; + } + } + + /** Fluent assertion on a query's logical plan. */ + protected static class QueryAssert { + private final RelNode plan; + + QueryAssert(RelNode plan) { + this.plan = plan; + } + + /** Assert the logical plan matches the expected tree string. */ + public QueryAssert assertPlan(String expected) { + assertEquals( + expected.stripTrailing(), + RelOptUtil.toString(plan).replaceAll("\\r\\n", "\n").stripTrailing()); + return this; + } + + /** Assert the logical plan contains the expected substring. */ + public QueryAssert assertPlanContains(String expected) { + String planStr = RelOptUtil.toString(plan).replaceAll("\\r\\n", "\n"); + assertTrue( + "Expected plan to contain: " + expected + "\nActual plan:\n" + planStr, + planStr.contains(expected)); + return this; + } + + /** Assert the output field names match. */ + public QueryAssert assertFields(String... names) { + assertEquals(List.of(names), plan.getRowType().getFieldNames()); + return this; + } + + /** Access the underlying plan for custom assertions. */ + public RelNode plan() { + return plan; + } + } } diff --git a/async-query/src/test/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverterTest.java b/async-query/src/test/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverterTest.java index d7f8046a1b2..d9dceb27563 100644 --- a/async-query/src/test/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverterTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverterTest.java @@ -54,7 +54,8 @@ public void fromXContentWithDuplicateFields() throws IOException { Assertions.assertTrue( illegalArgumentException .getMessage() - .contains("Error while parsing the request body: Duplicate field 'datasource'")); + .contains( + "Error while parsing the request body: Duplicate Object property \"datasource\"")); } @Test diff --git a/benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedQueryBenchmark.java b/benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedQueryBenchmark.java index d75a87ea8c3..aeb47e78821 100644 --- a/benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedQueryBenchmark.java +++ b/benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedQueryBenchmark.java @@ -6,6 +6,7 @@ package org.opensearch.sql.api; import java.sql.PreparedStatement; +import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.calcite.rel.RelNode; import org.apache.calcite.sql.dialect.SparkSqlDialect; @@ -24,10 +25,12 @@ import org.openjdk.jmh.annotations.Warmup; import org.opensearch.sql.api.compiler.UnifiedQueryCompiler; import org.opensearch.sql.api.transpiler.UnifiedQueryTranspiler; +import org.opensearch.sql.executor.QueryType; /** - * JMH benchmark for measuring the overhead of unified query API components when processing queries. - * This provides baseline metrics and guidance for API consumers during integration. + * JMH benchmark for measuring the overhead of unified query API components when processing PPL and + * SQL queries. The {@code language} and {@code queryPattern} parameters produce a cross-product, + * enabling side-by-side comparison of equivalent queries across both languages. */ @Warmup(iterations = 2, time = 1) @Measurement(iterations = 5, time = 1) @@ -37,25 +40,69 @@ @Fork(value = 1) public class UnifiedQueryBenchmark extends UnifiedQueryTestBase { - /** Common query patterns for benchmarking. */ - @Param({ - "source = catalog.employees", - "source = catalog.employees | where age > 30", - "source = catalog.employees | stats count() by department", - "source = catalog.employees | sort - age", - "source = catalog.employees | where age > 25 | stats avg(age) by department | sort - department" - }) - private String query; + private static final Map PPL_QUERIES = + Map.of( + "scan", "source = catalog.employees", + "filter", "source = catalog.employees | where age > 30", + "aggregate", "source = catalog.employees | stats count() by department", + "sort", "source = catalog.employees | sort - age", + "complex", + """ + source = catalog.employees \ + | where age > 25 \ + | stats avg(age) by department \ + | sort - department\ + """); - /** Transpiler for converting logical plans to SQL strings. */ - private UnifiedQueryTranspiler transpiler; + private static final Map SQL_QUERIES = + Map.of( + "scan", "SELECT * FROM catalog.employees", + "filter", + """ + SELECT * + FROM catalog.employees + WHERE age > 30\ + """, + "aggregate", + """ + SELECT department, count(*) + FROM catalog.employees + GROUP BY department\ + """, + "sort", + """ + SELECT * + FROM catalog.employees + ORDER BY age DESC\ + """, + "complex", + """ + SELECT department, avg(age) + FROM catalog.employees + WHERE age > 25 + GROUP BY department + ORDER BY department\ + """); + + @Param({"PPL", "SQL"}) + private String language; + + @Param({"scan", "filter", "aggregate", "sort", "complex"}) + private String queryPattern; - /** Compiler for converting logical plans to executable statements. */ + private String query; + private UnifiedQueryTranspiler transpiler; private UnifiedQueryCompiler compiler; + @Override + protected QueryType queryType() { + return QueryType.valueOf(language); + } + @Setup(Level.Trial) public void setUpBenchmark() { super.setUp(); + query = (language.equals("PPL") ? PPL_QUERIES : SQL_QUERIES).get(queryPattern); transpiler = UnifiedQueryTranspiler.builder().dialect(SparkSqlDialect.DEFAULT).build(); compiler = new UnifiedQueryCompiler(context); } diff --git a/build.gradle b/build.gradle index 7c672cc2f22..9a52b144c10 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ buildscript { ext { - opensearch_version = System.getProperty("opensearch.version", "3.6.0-SNAPSHOT") + opensearch_version = System.getProperty("opensearch.version", "3.7.0-SNAPSHOT") isSnapshot = "true" == System.getProperty("build.snapshot", "true") buildVersionQualifier = System.getProperty("build.version_qualifier", "") version_tokens = opensearch_version.tokenize('-') diff --git a/common/build.gradle b/common/build.gradle index d839466f886..233eb30d797 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -32,6 +32,15 @@ repositories { mavenCentral() } +test { + maxParallelForks = Runtime.runtime.availableProcessors() + useJUnitPlatform() + testLogging { + events "passed", "skipped", "failed" + exceptionFormat "full" + } +} + dependencies { api "org.antlr:antlr4-runtime:4.13.2" api group: 'com.google.guava', name: 'guava', version: "${guava_version}" @@ -52,6 +61,8 @@ dependencies { testImplementation group: 'org.mockito', name: 'mockito-core', version: "${mockito_version}" testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: "${mockito_version}" testImplementation group: 'com.squareup.okhttp3', name: 'mockwebserver', version: '4.12.0' + + testRuntimeOnly('org.junit.platform:junit-platform-launcher') } diff --git a/common/src/main/java/org/opensearch/sql/common/error/ErrorCode.java b/common/src/main/java/org/opensearch/sql/common/error/ErrorCode.java new file mode 100644 index 00000000000..c86acd0d4e5 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/error/ErrorCode.java @@ -0,0 +1,57 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +/** + * Machine-readable error codes for categorizing exceptions. These codes help clients handle + * specific error types programmatically.
    + *
    + * Not a complete list, currently seeded with some initial values. Feel free to add variants or + * remove dead variants over time. + */ +public enum ErrorCode { + /** Field not found in the index mapping */ + FIELD_NOT_FOUND, + + /** Syntax error in query parsing */ + SYNTAX_ERROR, + + /** Ambiguous field reference (multiple fields with same name) */ + AMBIGUOUS_FIELD, + + /** Generic semantic validation error */ + SEMANTIC_ERROR, + + /** Expression evaluation failed */ + EVALUATION_ERROR, + + /** Type mismatch or type validation error */ + TYPE_ERROR, + + /** Unsupported feature or operation */ + UNSUPPORTED_OPERATION, + + /** Resource limit exceeded (memory, CPU, etc.) */ + RESOURCE_LIMIT_EXCEEDED, + + /** Index or datasource not found */ + INDEX_NOT_FOUND, + + /** Permission denied or insufficient privileges */ + PERMISSION_DENIED, + + /** Query planning failed */ + PLANNING_ERROR, + + /** Query execution failed */ + EXECUTION_ERROR, + + /** + * Unknown or unclassified error -- don't set this manually, it's filled in as the default if no + * other code applies. + */ + UNKNOWN +} diff --git a/common/src/main/java/org/opensearch/sql/common/error/ErrorReport.java b/common/src/main/java/org/opensearch/sql/common/error/ErrorReport.java new file mode 100644 index 00000000000..1430af5ed16 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/error/ErrorReport.java @@ -0,0 +1,282 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import lombok.Getter; + +/** + * Error report that wraps exceptions and accumulates contextual information as errors bubble up + * through system layers. + * + *

    Inspired by Rust's anyhow/eyre libraries, this class allows each layer to add context without + * modifying the original exception message. + * + *

    Example usage: + * + *

    + * try {
    + *   resolveField(fieldName);
    + * } catch (IllegalArgumentException e) {
    + *   throw ErrorReport.wrap(e)
    + *     .code(ErrorCode.FIELD_NOT_FOUND)
    + *     .stage(QueryProcessingStage.ANALYZING)
    + *     .location("while resolving fields in the index mapping")
    + *     .suggestion("Did you mean: '" + suggestedField + "'?")
    + *     .context("index_pattern", indexPattern)
    + *     .context("position", cursorPosition)
    + *     .build();
    + * }
    + * 
    + */ +public class ErrorReport extends RuntimeException { + + @Getter private final Exception cause; + @Getter private final ErrorCode code; + @Getter private final QueryProcessingStage stage; + private final List locationChain; + private final Map context; + @Getter private final String suggestion; + @Getter private final String details; + + private ErrorReport(Builder builder) { + super(builder.cause.getMessage(), builder.cause); + this.cause = builder.cause; + this.code = builder.code; + this.stage = builder.stage; + this.locationChain = new ArrayList<>(builder.locationChain); + this.context = new LinkedHashMap<>(builder.context); + this.suggestion = builder.suggestion; + this.details = builder.details; + } + + /** + * Wraps an exception with an error report builder. If the exception is already an ErrorReport, + * returns a builder initialized with the existing report's data. + * + * @param cause The underlying exception + * @return A builder for constructing the error report + */ + public static Builder wrap(Exception cause) { + if (cause instanceof ErrorReport existing) { + return new Builder(existing.cause) + .code(existing.code) + .stage(existing.stage) + .details(existing.details) + .suggestion(existing.suggestion) + .addLocationChain(existing.locationChain) + .addContext(existing.context); + } + return new Builder(cause); + } + + public List getLocationChain() { + return new ArrayList<>(locationChain); + } + + public Map getContext() { + return new LinkedHashMap<>(context); + } + + /** Get the original exception type name. */ + public String getExceptionType() { + return cause.getClass().getSimpleName(); + } + + /** + * Format as a detailed message with all context information. This is suitable for logging or + * detailed error displays. + */ + public String toDetailedMessage() { + StringBuilder sb = new StringBuilder(); + + sb.append("Error"); + if (code != null && code != ErrorCode.UNKNOWN) { + sb.append(" [").append(code).append("]"); + } + if (stage != null) { + sb.append(" at stage: ").append(stage.getDisplayName()); + } + sb.append("\n"); + + if (details != null) { + sb.append("Details: ").append(details).append("\n"); + } + + if (!locationChain.isEmpty()) { + sb.append("\nLocation chain:\n"); + for (int i = 0; i < locationChain.size(); i++) { + // The location chain is typically appended to as we traverse up the stack, but for reading + // the error it makes more sense to go down the stack. So we reverse it. + sb.append(" ") + .append(i + 1) + .append(". ") + .append(locationChain.get(locationChain.size() - i - 1)) + .append("\n"); + } + } + + if (!context.isEmpty()) { + sb.append("\nContext:\n"); + context.forEach( + (key, value) -> sb.append(" ").append(key).append(": ").append(value).append("\n")); + } + + if (suggestion != null) { + sb.append("\nSuggestion: ").append(suggestion).append("\n"); + } + + return sb.toString(); + } + + /** + * Convert to JSON-compatible map structure for REST API responses. + * + * @return Map containing error information in structured format + */ + public Map toJsonMap() { + Map json = new LinkedHashMap<>(); + + json.put("type", getExceptionType()); + + if (code != null) { + json.put("code", code.name()); + } + + if (details != null) { + json.put("details", details); + } + + if (!locationChain.isEmpty()) { + // The location chain is typically appended to as we traverse up the stack, but for reading + // the error it makes more sense to go down the stack. So we reverse it. + json.put("location", locationChain.reversed()); + } + + // Build context with stage information included + Map contextMap = new LinkedHashMap<>(context); + if (stage != null) { + contextMap.put("stage", stage.toJsonKey()); + contextMap.put("stage_description", stage.getDisplayName()); + } + if (!contextMap.isEmpty()) { + json.put("context", contextMap); + } + + if (suggestion != null) { + json.put("suggestion", suggestion); + } + + return json; + } + + /** Builder for constructing error reports with contextual information. */ + public static class Builder { + private final Exception cause; + private ErrorCode code = ErrorCode.UNKNOWN; + private QueryProcessingStage stage = null; + private final List locationChain = new ArrayList<>(); + private final Map context = new LinkedHashMap<>(); + private String suggestion = null; + private String details = null; + + private Builder(Exception cause) { + this.cause = cause; + // Default details to the original exception message + this.details = + cause.getLocalizedMessage() != null ? cause.getLocalizedMessage() : cause.getMessage(); + } + + /** Set the machine-readable error code. */ + public Builder code(ErrorCode code) { + this.code = code; + return this; + } + + /** Set the query processing stage where the error occurred. */ + public Builder stage(QueryProcessingStage stage) { + // Don't overwrite more-specific stages with less-specific ones + if (this.stage == null) { + this.stage = stage; + } + return this; + } + + /** + * Add a location to the chain describing where the error occurred. Locations are added in order + * from innermost to outermost layer. + * + * @param location Description like "while resolving fields in index mapping" + */ + public Builder location(String location) { + this.locationChain.add(location); + return this; + } + + /** + * Add multiple locations from an existing chain. + * + * @param locations List of location descriptions + */ + private Builder addLocationChain(List locations) { + this.locationChain.addAll(locations); + return this; + } + + /** + * Add structured context data (index name, query, position, etc). + * + * @param key Context key + * @param value Context value (will be converted to string for serialization) + */ + public Builder context(String key, Object value) { + this.context.put(key, value); + return this; + } + + /** + * Add multiple context entries from an existing map. + * + * @param contextMap Map of context key-value pairs + */ + private Builder addContext(Map contextMap) { + this.context.putAll(contextMap); + return this; + } + + /** + * Set a suggestion for how to fix the error. + * + * @param suggestion User-facing suggestion like "Did you mean: 'foo'?" + */ + public Builder suggestion(String suggestion) { + this.suggestion = suggestion; + return this; + } + + /** + * Override the default details message. By default, uses the wrapped exception's message. + * + * @param details Custom details message + */ + public Builder details(String details) { + this.details = details; + return this; + } + + /** + * Build and throw the error report as an exception. + * + * @return The constructed error report (can be thrown) + */ + public ErrorReport build() { + return new ErrorReport(this); + } + } +} diff --git a/common/src/main/java/org/opensearch/sql/common/error/QueryProcessingStage.java b/common/src/main/java/org/opensearch/sql/common/error/QueryProcessingStage.java new file mode 100644 index 00000000000..98da1db5880 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/error/QueryProcessingStage.java @@ -0,0 +1,46 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +import lombok.Getter; + +/** + * Enumeration of query processing stages for error location tracking. These stages represent the + * major phases of query execution in the Calcite query planner. May not be a complete list, add + * stages if needed. + */ +@Getter +public enum QueryProcessingStage { + /** + * ANALYZING stage: Semantic validation and type checking. Errors: Field not found, type + * mismatches, semantic violations. + */ + ANALYZING("Parsing and validating the query"), + + /** + * PLAN_CONVERSION stage: Conversion to Calcite execution plan with system limits. Errors: + * Unsupported operations, plan conversion failures. + */ + PLAN_CONVERSION("Preparing the query for physical execution"), + + /** + * EXECUTING stage: Query execution via OpenSearch engine. Errors: Execution failures, index + * access errors, resource limits. + */ + EXECUTING("Running the query"); + + /** -- GETTER -- Get human-readable display name for this stage. */ + private final String displayName; + + QueryProcessingStage(String displayName) { + this.displayName = displayName; + } + + /** Get lowercase name suitable for JSON serialization. */ + public String toJsonKey() { + return name().toLowerCase(); + } +} diff --git a/common/src/main/java/org/opensearch/sql/common/error/StageErrorHandler.java b/common/src/main/java/org/opensearch/sql/common/error/StageErrorHandler.java new file mode 100644 index 00000000000..2827293a9e2 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/error/StageErrorHandler.java @@ -0,0 +1,103 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +import java.util.function.Supplier; + +/** + * Utility class for handling errors at specific query processing stages. This provides a consistent + * way to wrap operations with stage-specific error context. + * + *

    Example usage in QueryService: + * + *

    + * RelNode relNode = StageErrorHandler.executeStage(
    + *   QueryProcessingStage.ANALYZING,
    + *   () -> analyze(plan, context),
    + *   "while analyzing query plan"
    + * );
    + * 
    + */ +public class StageErrorHandler { + + /** + * Execute an operation and wrap any thrown exceptions with stage context. + * + * @param stage The query processing stage + * @param operation The operation to execute + * @param location Optional location description for error context + * @param Return type of the operation + * @return The result of the operation + * @throws ErrorReport if the operation throws an exception + */ + public static T executeStage( + QueryProcessingStage stage, Supplier operation, String location) { + try { + return operation.get(); + } catch (Exception e) { + throw ErrorReport.wrap(e).stage(stage).location(location).build(); + } + } + + /** + * Execute an operation and wrap any thrown exceptions with stage context (no location). + * + * @param stage The query processing stage + * @param operation The operation to execute + * @param Return type of the operation + * @return The result of the operation + * @throws ErrorReport if the operation throws an exception + */ + public static T executeStage(QueryProcessingStage stage, Supplier operation) { + return executeStage(stage, operation, null); + } + + /** + * Execute a void operation and wrap any thrown exceptions with stage context. + * + * @param stage The query processing stage + * @param operation The operation to execute + * @param location Optional location description for error context + * @throws ErrorReport if the operation throws an exception + */ + public static void executeStageVoid( + QueryProcessingStage stage, Runnable operation, String location) { + try { + operation.run(); + } catch (Exception e) { + throw ErrorReport.wrap(e).stage(stage).location(location).build(); + } + } + + /** + * Execute a void operation and wrap any thrown exceptions with stage context (no location). + * + * @param stage The query processing stage + * @param operation The operation to execute + * @throws ErrorReport if the operation throws an exception + */ + public static void executeStageVoid(QueryProcessingStage stage, Runnable operation) { + executeStageVoid(stage, operation, null); + } + + /** + * Wrap an exception with stage context without executing an operation. Useful for re-throwing + * exceptions with additional context. + * + * @param stage The query processing stage + * @param e The exception to wrap + * @param location Optional location description + * @return ErrorReport with stage context + */ + public static ErrorReport wrapWithStage( + QueryProcessingStage stage, Exception e, String location) { + ErrorReport.Builder builder = ErrorReport.wrap(e).stage(stage); + if (location != null) { + builder.location(location); + } + return builder.build(); + } +} diff --git a/common/src/test/java/org/opensearch/sql/common/error/ErrorReportTest.java b/common/src/test/java/org/opensearch/sql/common/error/ErrorReportTest.java new file mode 100644 index 00000000000..e3460d7a703 --- /dev/null +++ b/common/src/test/java/org/opensearch/sql/common/error/ErrorReportTest.java @@ -0,0 +1,152 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.common.error; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Map; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; +import org.junit.jupiter.api.Test; + +/** Unit tests for ErrorReport. */ +public class ErrorReportTest { + + @Test + public void testBasicErrorReport() { + Exception cause = new IllegalArgumentException("Field not found"); + + ErrorReport report = + ErrorReport.wrap(cause) + .code(ErrorCode.FIELD_NOT_FOUND) + .stage(QueryProcessingStage.ANALYZING) + .location("while resolving fields in projection") + .context("field_name", "timestamp") + .context("table", "logs") + .suggestion("Check that field exists") + .build(); + + assertEquals(ErrorCode.FIELD_NOT_FOUND, report.getCode()); + assertEquals(QueryProcessingStage.ANALYZING, report.getStage()); + assertEquals(1, report.getLocationChain().size()); + assertEquals("while resolving fields in projection", report.getLocationChain().get(0)); + assertEquals("timestamp", report.getContext().get("field_name")); + assertEquals("logs", report.getContext().get("table")); + assertEquals("Check that field exists", report.getSuggestion()); + assertEquals("Field not found", report.getDetails()); + } + + @Test + public void testErrorReportJsonMapWithStageInContext() { + Exception cause = new IllegalArgumentException("Field not found"); + + ErrorReport report = + ErrorReport.wrap(cause) + .code(ErrorCode.FIELD_NOT_FOUND) + .stage(QueryProcessingStage.ANALYZING) + .location("while analyzing query") + .context("field_name", "test") + .build(); + + Map json = report.toJsonMap(); + + // Check top-level fields + assertEquals("IllegalArgumentException", json.get("type")); + assertEquals("FIELD_NOT_FOUND", json.get("code")); + assertEquals("Field not found", json.get("details")); + + // Check location + assertTrue(json.containsKey("location")); + + // Check that stage is in context + assertTrue(json.containsKey("context")); + @SuppressWarnings("unchecked") + Map context = (Map) json.get("context"); + assertEquals("analyzing", context.get("stage")); + assertEquals("Parsing and validating the query", context.get("stage_description")); + assertEquals("test", context.get("field_name")); + } + + @Test + public void testIdempotentWrapping() { + Exception originalCause = new IllegalArgumentException("Original error"); + + ErrorReport firstWrap = + ErrorReport.wrap(originalCause) + .code(ErrorCode.FIELD_NOT_FOUND) + .stage(QueryProcessingStage.ANALYZING) + .context("field_name", "test") + .build(); + + // Wrap again with additional context + ErrorReport secondWrap = + ErrorReport.wrap(firstWrap) + .stage(QueryProcessingStage.PLAN_CONVERSION) + .location("during plan conversion") + .context("additional_context", "value") + .build(); + + // Original cause should still be the IllegalArgumentException + assertEquals("Original error", secondWrap.getDetails()); + + // Should have accumulated context + Map context = secondWrap.getContext(); + assertEquals("test", context.get("field_name")); + assertEquals("value", context.get("additional_context")); + + // Should have location from second wrap + assertTrue(secondWrap.getLocationChain().contains("during plan conversion")); + } + + @Test + public void testStageErrorHandler() { + // Test successful execution + String result = + StageErrorHandler.executeStage( + QueryProcessingStage.ANALYZING, () -> "success", "test operation"); + + assertEquals("success", result); + + // Test error wrapping + Exception thrown = + assertThrows( + ErrorReport.class, + () -> + StageErrorHandler.executeStage( + QueryProcessingStage.ANALYZING, + () -> { + throw new IllegalArgumentException("Test error"); + }, + "while testing")); + + ErrorReport report = (ErrorReport) thrown; + assertEquals(QueryProcessingStage.ANALYZING, report.getStage()); + assertTrue(report.getLocationChain().contains("while testing")); + } + + @Test + public void testToDetailedMessage() { + Exception cause = new IllegalArgumentException("Field not found"); + + ErrorReport report = + ErrorReport.wrap(cause) + .code(ErrorCode.FIELD_NOT_FOUND) + .stage(QueryProcessingStage.ANALYZING) + .location("while resolving fields") + .context("field_name", "test") + .suggestion("Check field name") + .build(); + + String message = report.toDetailedMessage(); + + MatcherAssert.assertThat(message, CoreMatchers.containsString("FIELD_NOT_FOUND")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("validating the query")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("Field not found")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("while resolving fields")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("field_name")); + MatcherAssert.assertThat(message, CoreMatchers.containsString("Check field name")); + } +} diff --git a/core/build.gradle b/core/build.gradle index 23f9b37e317..f567fb85653 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -33,6 +33,7 @@ plugins { } repositories { + mavenLocal() mavenCentral() } @@ -63,6 +64,12 @@ dependencies { } api 'org.apache.calcite:calcite-linq4j:1.41.0' api project(':common') + compileOnly 'org.opensearch.sandbox:analytics-api:3.7.0-SNAPSHOT' + // Needed because analytics-api's QueryPlanExecutor signature uses + // org.opensearch.core.action.ActionListener; AnalyticsExecutionEngine references that type. + compileOnly group: 'org.opensearch', name: 'opensearch-core', version: "${opensearch_version}" + testImplementation 'org.opensearch.sandbox:analytics-api:3.7.0-SNAPSHOT' + testImplementation group: 'org.opensearch', name: 'opensearch-core', version: "${opensearch_version}" implementation "com.github.seancfoley:ipaddress:5.4.2" implementation "com.jayway.jsonpath:json-path:2.9.0" diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index fc96f2f389c..c25b027e4ec 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -106,6 +106,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -897,6 +898,11 @@ public LogicalPlan visitMultisearch(Multisearch node, AnalysisContext context) { throw getOnlyForCalciteException("Multisearch"); } + @Override + public LogicalPlan visitUnion(Union node, AnalysisContext context) { + throw getOnlyForCalciteException("Union"); + } + private LogicalSort buildSort( LogicalPlan child, AnalysisContext context, Integer count, List sortFields) { ExpressionReferenceOptimizer optimizer = diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 7f02bb3ef1b..be02547a2da 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -93,6 +93,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -472,6 +473,10 @@ public T visitMultisearch(Multisearch node, C context) { return visitChildren(node, context); } + public T visitUnion(Union node, C context) { + return visitChildren(node, context); + } + public T visitAddTotals(AddTotals node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/statement/ExplainMode.java b/core/src/main/java/org/opensearch/sql/ast/statement/ExplainMode.java index 9043f05929b..b52d64f4867 100644 --- a/core/src/main/java/org/opensearch/sql/ast/statement/ExplainMode.java +++ b/core/src/main/java/org/opensearch/sql/ast/statement/ExplainMode.java @@ -8,6 +8,7 @@ import java.util.Locale; import lombok.Getter; import lombok.RequiredArgsConstructor; +import org.apache.calcite.sql.SqlExplainLevel; @RequiredArgsConstructor public enum ExplainMode { @@ -26,4 +27,13 @@ public static ExplainMode of(String mode) { return ExplainMode.STANDARD; } } + + /** Convert to Calcite SqlExplainLevel for RelOptUtil.toString(). */ + public SqlExplainLevel toExplainLevel() { + return switch (this) { + case SIMPLE -> SqlExplainLevel.NO_ATTRIBUTES; + case COST -> SqlExplainLevel.ALL_ATTRIBUTES; + default -> SqlExplainLevel.EXPPLAN_ATTRIBUTES; + }; + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java b/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java index 74406b0daf2..259330b2dba 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java @@ -23,6 +23,7 @@ @RequiredArgsConstructor public class Convert extends UnresolvedPlan { private final List conversions; + private final String timeFormat; private UnresolvedPlan child; @Override diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java index f29285560f9..f1457a8a540 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java @@ -45,8 +45,11 @@ public enum Direction { /** Target table for graph traversal lookup. */ private final UnresolvedPlan fromTable; - /** Field in sourceTable to start with. */ - private final Field startField; + /** Field in sourceTable to start with (piped mode). Null when using literal start values. */ + private @Nullable final Field startField; + + /** Literal start values for top-level graphlookup (mutually exclusive with startField). */ + private @Nullable final List startValues; /** Field in fromTable that represents the outgoing edge. */ private final Field fromField; diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Union.java b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java new file mode 100644 index 00000000000..a96831567cb --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Union.java @@ -0,0 +1,44 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +/** Logical plan node for Union operation. Combines results from multiple datasets (UNION ALL). */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +@AllArgsConstructor +public class Union extends UnresolvedPlan { + private final List datasets; + + private Integer maxout; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + List newDatasets = + ImmutableList.builder().add(child).addAll(datasets).build(); + return new Union(newDatasets, maxout); + } + + @Override + public List getChild() { + return datasets; + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitUnion(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 489c933953f..1251f51b131 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -22,6 +22,7 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_SUBSEARCH; import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation; import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall; +import static org.opensearch.sql.calcite.utils.PlanUtils.stripInputSort; import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild; import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; @@ -35,6 +36,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -47,9 +49,13 @@ import org.apache.calcite.adapter.enumerable.RexToLixTranslator; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.ViewExpanders; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFamily; @@ -95,6 +101,7 @@ import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.PatternMethod; import org.opensearch.sql.ast.expression.PatternMode; +import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.Span; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.UnresolvedExpression; @@ -145,13 +152,13 @@ import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; import org.opensearch.sql.ast.tree.Search; -import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.Trendline.TrendlineType; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -167,6 +174,8 @@ import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; import org.opensearch.sql.calcite.utils.WildcardUtils; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.common.patterns.PatternUtils; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.datasource.DataSourceService; @@ -181,6 +190,18 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor { + /** + * Prefix/suffix applied to right-side fields in the streamstats self-join plan to avoid name + * collisions with the left side and to make the renaming reversible. + */ + private static final String RIGHT_SIDE_FIELD_PREFIX = "__r_"; + + private static final String RIGHT_SIDE_FIELD_SUFFIX = "__"; + + /** Name of the right-side sequence column in the streamstats self-join plan. */ + private static final String RIGHT_SIDE_SEQ_COLUMN = + RIGHT_SIDE_FIELD_PREFIX + "seq" + RIGHT_SIDE_FIELD_SUFFIX; + private final CalciteRexNodeVisitor rexVisitor; private final CalciteAggCallVisitor aggVisitor; private final DataSourceService dataSourceService; @@ -337,7 +358,7 @@ public RelNode visitRegex(Regex node, CalcitePlanContext context) { return context.relBuilder.peek(); } - public RelNode visitRex(Rex node, CalcitePlanContext context) { + private RelNode innerRex(Rex node, CalcitePlanContext context) { visitChildren(node, context); RexNode fieldRex = rexVisitor.analyze(node.getField(), context); @@ -402,6 +423,17 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) { return context.relBuilder.peek(); } + public RelNode visitRex(Rex node, CalcitePlanContext context) { + try { + return innerRex(node, context); + } catch (RuntimeException ex) { + throw ErrorReport.wrap(ex) + .location("while processing the rex command") + .context("command", "rex") + .build(); + } + } + private boolean containsSubqueryExpression(Node expr) { if (expr == null) { return false; @@ -649,6 +681,10 @@ public RelNode visitRename(Rename node, CalcitePlanContext context) { } List matchingFields = WildcardRenameUtils.matchFieldNames(sourcePattern, newNames); + // Exclude metadata fields from wildcard rename (issue #5099) + if (WildcardRenameUtils.isWildcardPattern(sourcePattern)) { + matchingFields.removeIf(this::isMetadataField); + } for (String fieldName : matchingFields) { String newName = @@ -679,7 +715,7 @@ private void removeFieldIfExists( } @Override - public RelNode visitSort(Sort node, CalcitePlanContext context) { + public RelNode visitSort(org.opensearch.sql.ast.tree.Sort node, CalcitePlanContext context) { visitChildren(node, context); List sortList = node.getSortList().stream() @@ -727,25 +763,110 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { return context.relBuilder.peek(); } - private static final String REVERSE_ROW_NUM = "__reverse_row_num__"; + /** + * Insert a reversed sort node after finding the original sort in the tree. This rebuilds the tree + * with the reversed sort inserted right after the original sort. + * + * @param root the root of the tree to rebuild + * @param reversedCollation the reversed collation to insert + * @param context the Calcite plan context + * @return the rebuilt tree with reversed sort inserted + */ + private RelNode insertReversedSortInTree( + RelNode root, RelCollation reversedCollation, CalcitePlanContext context) { + return root.accept( + new RelHomogeneousShuttle() { + boolean sortFound = false; + + @Override + public RelNode visit(RelNode other) { + if (!sortFound && other instanceof Sort) { + Sort sort = (Sort) other; + // Treat a Sort with fetch or offset as a barrier (limit node). + // Place the reversed sort above the barrier to preserve limit semantics, + // rather than inserting below the downstream collation Sort. + if (sort.fetch != null || sort.offset != null) { + sortFound = true; + RelNode visitedBarrier = super.visit(other); + return LogicalSort.create(visitedBarrier, reversedCollation, null, null); + } + // Found a collation Sort - replace in-place with reversed collation. + // Stacking a reversed sort on top would create consecutive sorts, and + // Calcite's SortRemoveRule would merge them keeping the original direction. + if (sort.getCollation() != null + && !sort.getCollation().getFieldCollations().isEmpty()) { + sortFound = true; + RelNode visitedInput = sort.getInput().accept(this); + return LogicalSort.create(visitedInput, reversedCollation, null, null); + } + } + // For all other nodes, continue traversal + return super.visit(other); + } + }); + } @Override public RelNode visitReverse( org.opensearch.sql.ast.tree.Reverse node, CalcitePlanContext context) { visitChildren(node, context); - // Add ROW_NUMBER() column - RexNode rowNumber = - context - .relBuilder - .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as(REVERSE_ROW_NUM); - context.relBuilder.projectPlus(rowNumber); - // Sort by row number descending - context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM))); - // Remove row number column - context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM)); + + // Check if there's an existing sort to reverse + List collations = + context.relBuilder.getCluster().getMetadataQuery().collations(context.relBuilder.peek()); + RelCollation collation = collations != null && !collations.isEmpty() ? collations.get(0) : null; + + if (collation != null && !collation.getFieldCollations().isEmpty()) { + // If there's an existing sort, reverse its direction + RelCollation reversedCollation = PlanUtils.reverseCollation(collation); + RelNode currentNode = context.relBuilder.peek(); + if (currentNode instanceof Sort) { + Sort existingSort = (Sort) currentNode; + if (existingSort.getCollation() != null + && !existingSort.getCollation().getFieldCollations().isEmpty() + && existingSort.fetch == null + && existingSort.offset == null) { + // Pure collation sort (no fetch/offset) - replace in-place to avoid consecutive + // sorts. Calcite's SortRemoveRule merges consecutive LogicalSort nodes and keeps + // the lower sort's direction, which discards the reversed direction. + // Replacing in-place avoids this issue. + RelCollation reversedFromSort = PlanUtils.reverseCollation(existingSort.getCollation()); + RelNode replacedSort = + LogicalSort.create(existingSort.getInput(), reversedFromSort, null, null); + PlanUtils.replaceTop(context.relBuilder, replacedSort); + } else { + // Sort with fetch/offset (limit) or fetch-only Sort - add a separate reversed + // sort on top so the "limit then reverse" semantics are preserved. + context.relBuilder.sort(reversedCollation); + } + } else { + context.relBuilder.sort(reversedCollation); + } + } else { + // Collation not found on current node - try backtracking + RelNode currentNode = context.relBuilder.peek(); + RelCollation backtrackCollation = PlanUtils.findInputCollation(currentNode); + + if (backtrackCollation != null && !backtrackCollation.getFieldCollations().isEmpty()) { + // Found collation through backtracking - rebuild tree with reversed sort + RelCollation reversedCollation = PlanUtils.reverseCollation(backtrackCollation); + RelNode rebuiltTree = insertReversedSortInTree(currentNode, reversedCollation, context); + // Replace the current node in the builder with the rebuilt tree + context.relBuilder.build(); // Pop the current node + context.relBuilder.push(rebuiltTree); // Push the rebuilt tree + } else { + // Check if @timestamp field exists in the row type + List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + if (fieldNames.contains(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP)) { + // If @timestamp exists, sort by it in descending order + context.relBuilder.sort( + context.relBuilder.desc( + context.relBuilder.field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP))); + } + // If neither collation nor @timestamp exists, ignore the reverse command (no-op) + } + } + return context.relBuilder.peek(); } @@ -780,7 +901,7 @@ public RelNode visitTranspose( // Step 2: UNPIVOT b.unpivot( false, - ImmutableList.of("value"), + ImmutableList.of(PlanUtils.VALUE_COLUMN_FOR_TRANSPOSE), ImmutableList.of(columnName), fieldNames.stream() .map( @@ -802,7 +923,7 @@ public RelNode visitTranspose( // Step 4: PIVOT b.pivot( b.groupKey(trimmedColumnName), - ImmutableList.of(b.max(b.field("value"))), + ImmutableList.of(b.max(b.field(PlanUtils.VALUE_COLUMN_FOR_TRANSPOSE))), ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)), IntStream.rangeClosed(1, maxRows) .mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i)))) @@ -830,10 +951,60 @@ public RelNode visitBin(Bin node, CalcitePlanContext context) { String alias = node.getAlias() != null ? node.getAlias() : fieldName; projectPlusOverriding(List.of(binExpression), List.of(alias), context); + dropStructParentsFor(alias, context); return context.relBuilder.peek(); } + /** + * If {@code dottedName} addresses a nested leaf inside a struct that OpenSearch has exposed + * through both its struct-parent columns and its flattened leaf columns (e.g. the telemetry + * mapping exposes {@code resource}, {@code resource.attributes}, ..., {@code + * resource.attributes.telemetry.sdk.version} side-by-side), drop the struct-parent prefixes from + * the current row. This keeps a subsequent {@link #tryToRemoveNestedFields(CalcitePlanContext)} + * pass from collapsing the flattened leaves back into the parents when the final implicit {@code + * fields *} projection runs. + * + *

    This preserves the behaviour that issue #4482 originally required for {@code bin} on a + * nested field without an explicit {@code fields} projection. It is invoked from two places: + * + *

      + *
    • {@link #projectPlusOverriding(List, List, CalcitePlanContext)} — for every override whose + * new name exactly matched a pre-existing column. This catches {@code eval} (and every + * other command that funnels through {@code projectPlusOverriding}) assigning to an + * existing flattened nested leaf. + *
    • {@link #visitBin(Bin, CalcitePlanContext)} — defensively, so that {@code bin} keeps + * dropping struct parents even when the alias happens not to match an existing field name + * (e.g. when the user supplied a custom alias). This is also what the regression test in + * {@code CalciteBinCommandIT#testBinWithNestedFieldWithoutExplicitProjection} exercises. + *
    + * + * Using this narrowly-scoped pruning instead of a global prefix-override in {@link + * #shouldOverrideField} is what keeps issue #5185 and the reviewer's {@code eval agent.name = + * ...} case safe. + * + *

    No-op when no such struct-parent columns exist (e.g. flat columns or MAP roots from {@code + * spath}). + */ + private void dropStructParentsFor(String dottedName, CalcitePlanContext context) { + if (dottedName == null || dottedName.indexOf('.') < 0) { + return; + } + List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + List parentsToDrop = new ArrayList<>(); + int dotIdx = dottedName.indexOf('.'); + while (dotIdx >= 0) { + String prefix = dottedName.substring(0, dotIdx); + if (fieldNames.contains(prefix)) { + parentsToDrop.add(context.relBuilder.field(prefix)); + } + dotIdx = dottedName.indexOf('.', dotIdx + 1); + } + if (!parentsToDrop.isEmpty()) { + context.relBuilder.projectExcept(parentsToDrop); + } + } + @Override public RelNode visitParse(Parse node, CalcitePlanContext context) { visitChildren(node, context); @@ -1027,7 +1198,7 @@ public RelNode visitConvert(Convert node, CalcitePlanContext context) { ConversionState state = new ConversionState(); for (Let conversion : node.getConversions()) { - processConversion(conversion, state, context); + processConversion(conversion, node.getTimeFormat(), state, context); } return buildConversionProjection(state, context); @@ -1040,14 +1211,14 @@ private static class ConversionState { } private void processConversion( - Let conversion, ConversionState state, CalcitePlanContext context) { + Let conversion, String timeFormat, ConversionState state, CalcitePlanContext context) { String target = conversion.getVar().getField().toString(); UnresolvedExpression expression = conversion.getExpression(); if (expression instanceof Field) { processFieldCopyConversion(target, (Field) expression, state, context); } else if (expression instanceof Function) { - processFunctionConversion(target, (Function) expression, state, context); + processFunctionConversion(target, (Function) expression, timeFormat, state, context); } else { throw new SemanticCheckException("Convert command requires function call expressions"); } @@ -1070,7 +1241,11 @@ private void processFieldCopyConversion( } private void processFunctionConversion( - String target, Function function, ConversionState state, CalcitePlanContext context) { + String target, + Function function, + String timeFormat, + ConversionState state, + CalcitePlanContext context) { String functionName = function.getFuncName(); List args = function.getFuncArgs(); @@ -1087,8 +1262,7 @@ private void processFunctionConversion( state.seenFields.add(source); RexNode sourceField = context.relBuilder.field(source); - RexNode convertCall = - PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, functionName, sourceField); + RexNode convertCall = resolveConvertFunction(functionName, sourceField, timeFormat, context); if (!target.equals(source)) { state.additions.add(Pair.of(target, context.relBuilder.alias(convertCall, target))); @@ -1097,6 +1271,23 @@ private void processFunctionConversion( } } + private RexNode resolveConvertFunction( + String functionName, RexNode sourceField, String timeFormat, CalcitePlanContext context) { + + // Time functions that support timeformat parameter + Set timeFunctions = Set.of("ctime", "mktime"); + + if (timeFunctions.contains(functionName.toLowerCase()) && timeFormat != null) { + // For time functions with custom timeformat, pass the format as a second parameter + RexNode timeFormatLiteral = context.rexBuilder.makeLiteral(timeFormat); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, functionName, sourceField, timeFormatLiteral); + } else { + // Regular conversion functions or time functions without custom format + return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, functionName, sourceField); + } + } + private RelNode buildConversionProjection(ConversionState state, CalcitePlanContext context) { List originalFields = context.relBuilder.peek().getRowType().getFieldNames(); List projectList = new ArrayList<>(); @@ -1129,12 +1320,12 @@ private RelNode buildConversionProjection(ConversionState state, CalcitePlanCont private void projectPlusOverriding( List newFields, List newNames, CalcitePlanContext context) { - List originalFieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + Set originalFieldNameSet = + new HashSet<>(context.relBuilder.peek().getRowType().getFieldNames()); + List overriddenNames = + newNames.stream().filter(originalFieldNameSet::contains).toList(); List toOverrideList = - originalFieldNames.stream() - .filter(originalName -> shouldOverrideField(originalName, newNames)) - .map(a -> (RexNode) context.relBuilder.field(a)) - .toList(); + overriddenNames.stream().map(a -> (RexNode) context.relBuilder.field(a)).toList(); // 1. add the new fields, For example "age0, country0" context.relBuilder.projectPlus(newFields); // 2. drop the overriding field list, it's duplicated now. For example "age, country" @@ -1150,17 +1341,49 @@ private void projectPlusOverriding( expectedRenameFields.addAll(newNames); // 5. rename context.relBuilder.rename(expectedRenameFields); + // 6. For each overridden dotted-path name that matched an existing flattened nested leaf, + // prune the struct-parent columns that OpenSearch exposed side-by-side with that leaf. Without + // this, a downstream implicit `fields *` invokes `tryToRemoveNestedFields`, which would drop + // the freshly-assigned dotted leaf back out again because its struct-parent prefix is still in + // the row schema (see issue #4482 and the scratch coverage in CalciteEvalCommandIT). + // + // Gating on "the override actually fired" is what keeps the reviewer's PR #5351 case safe: + // `source=idx | fields agent | eval agent.name = "test"` has no pre-existing `agent.name` + // column, so overriddenNames is empty and the struct-parent `agent` survives untouched. + // It also keeps issue #5185 safe — spath introduces a MAP root and subsequent eval assigns + // to brand-new dotted paths that were not already in the row schema. + for (String overridden : overriddenNames) { + dropStructParentsFor(overridden, context); + } } + /** + * Determine whether the column {@code originalName} should be replaced when a batch of new + * columns named {@code newNames} is being added. Only exact-name matches count as overrides — + * {@code eval foo.bar = ...} creates a brand new field literally named {@code foo.bar} and must + * never drop sibling or parent fields. This mirrors SPL1 semantics, where assigning a dotted name + * introduces a literal column of that name without touching any other field. + * + *

    Earlier revisions (see PR #4606 / #5351) attempted to broaden this to a {@code + * newName.startsWith(originalName + ".")} prefix match. That prefix branch silently dropped any + * column that happened to be a prefix of an eval target, which caused two regressions: + * + *

      + *
    • Issue #5185 — a MAP-typed root column produced by {@code spath} got dropped when eval + * introduced multiple dotted-path fields under it. + *
    • The reviewer's case on PR #5351 — {@code source=big5 | fields agent | eval agent.name = + * "test"} dropped the {@code agent} column entirely. + *
    + * + * Struct-parent pruning for the "override on a real flattened nested leaf" case is handled + * uniformly in {@link #projectPlusOverriding(List, List, CalcitePlanContext)}, which invokes + * {@link #dropStructParentsFor(String, CalcitePlanContext)} only for overrides that actually + * replaced an existing column. This keeps issue #4482 fixed across every command that funnels + * through {@code projectPlusOverriding} (bin, eval, rex/sed, trendline, expand, flatten, + * patterns) without reintroducing the #5185 / reviewer regressions here. + */ private boolean shouldOverrideField(String originalName, List newNames) { - return newNames.stream() - .anyMatch( - newName -> - // Match exact field names (e.g., "age" == "age") for flat fields - newName.equals(originalName) - // OR match nested paths (e.g., "resource.attributes..." starts with - // "resource.") - || newName.startsWith(originalName + ".")); + return newNames.contains(originalName); } private List> extractInputRefList(List aggCalls) { @@ -1315,10 +1538,25 @@ private Pair, List> aggregateWithTrimming( * count(a.b)] returns true. */ private boolean containsNestedAggregator(RelBuilder relBuilder, List aggCallRefs) { + // For each aggregator argument, take the part of its column name before the first dot + // (e.g. "city" from "city.location.latitude") and check whether that's a top-level + // ARRAY column — the marker for an OpenSearch `nested` field. + // + // The classic path always exposes a top-level column for object/nested parents. The + // analytics-engine path emits only the flat leaves ("city.name", "city.location.latitude") + // because parent placeholder types (MAP) can't round-trip through Substrait. + // RelDataType.getField returns null when the column doesn't exist — for analytics-engine, + // that null just means "not nested," which is the right answer. + RelDataType rowType = relBuilder.peek().getRowType(); return aggCallRefs.stream() - .map(r -> relBuilder.peek().getRowType().getFieldNames().get(r.getIndex())) + .map(r -> rowType.getFieldNames().get(r.getIndex())) .map(name -> org.apache.commons.lang3.StringUtils.substringBefore(name, ".")) - .anyMatch(root -> relBuilder.field(root).getType().getSqlTypeName() == SqlTypeName.ARRAY); + .anyMatch( + root -> { + RelDataTypeField field = + rowType.getField(root, /* caseSensitive= */ true, /* elideRecord= */ false); + return field != null && field.getType().getSqlTypeName() == SqlTypeName.ARRAY; + }); } /** @@ -1570,7 +1808,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { : duplicatedFieldNames.stream() .map(a -> (RexNode) context.relBuilder.field(a)) .toList(); - buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication); + buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, null); } // add LogicalSystemLimit after dedup addSysLimitForJoinSubsearch(context); @@ -1628,7 +1866,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { List dedupeFields = getRightColumnsInJoinCriteria(context.relBuilder, joinCondition); - buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication); + buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, null); } // add LogicalSystemLimit after dedup addSysLimitForJoinSubsearch(context); @@ -1804,10 +2042,11 @@ public RelNode visitDedupe(Dedupe node, CalcitePlanContext context) { // Columns to deduplicate List dedupeFields = node.getFields().stream().map(f -> rexVisitor.analyze(f, context)).toList(); + RelCollation inputCollation = stripInputSort(context.relBuilder); if (keepEmpty) { - buildDedupOrNull(context.relBuilder, dedupeFields, allowedDuplication); + buildDedupOrNull(context.relBuilder, dedupeFields, allowedDuplication, inputCollation); } else { - buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication); + buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, inputCollation); } return context.relBuilder.peek(); } @@ -1916,14 +2155,14 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) context.relBuilder.projectPlus(streamSeq); RelNode left = context.relBuilder.build(); - // 2. Run correlate + aggregate - return buildStreamWindowJoinPlan( + // 2. Use self-join approach to avoid nested correlates (which cause NPE + // in Calcite's RelDecorrelator when chaining multiple streamstats) + return buildStreamWindowSelfJoinPlan( context, left, node, groupList, ROW_NUMBER_COLUMN_FOR_STREAMSTATS, - null, new String[] {ROW_NUMBER_COLUMN_FOR_STREAMSTATS}); } @@ -2054,6 +2293,229 @@ private RelNode buildStreamWindowJoinPlan( return context.relBuilder.peek(); } + /** + * Builds a self-join based plan for streamstats with global=true + window + group. This avoids + * using LogicalCorrelate which causes NPE in Calcite's RelDecorrelator when chaining multiple + * streamstats commands. + * + *

    Plan structure: + * + *

      + *
    1. left = input + __stream_seq__ + *
    2. right = trim to only aggregate input + __stream_seq__ + *
    3. Join left and right on window frame + group conditions + *
    4. Group by all left field indices, compute AGG(right.X) + *
    5. Sort by __stream_seq__, then remove it + *
    + */ + private RelNode buildStreamWindowSelfJoinPlan( + CalcitePlanContext context, + RelNode leftWithHelpers, + StreamWindow node, + List groupList, + String seqCol, + String[] helperColsToCleanup) { + + int leftFieldCount = leftWithHelpers.getRowType().getFieldCount(); + + // Build right side: project only the fields needed for aggregation + seq + group columns + // This avoids field name collisions and keeps the right side minimal + context.relBuilder.push(leftWithHelpers); + + // Collect fields needed on right side: seq col + group cols + aggregate input fields + List rightFields = new ArrayList<>(); + List rightFieldNames = new ArrayList<>(); + + // Always include seq col + rightFields.add(context.relBuilder.field(seqCol)); + rightFieldNames.add(RIGHT_SIDE_SEQ_COLUMN); + + // Include group columns + for (UnresolvedExpression groupExpr : groupList) { + String groupName = extractGroupFieldName(groupExpr); + rightFields.add(context.relBuilder.field(groupName)); + rightFieldNames.add(toRightSideFieldName(groupName)); + } + + // Include aggregate input fields (extract field names from window functions) + Set aggInputFields = new LinkedHashSet<>(); + for (UnresolvedExpression wfExpr : node.getWindowFunctionList()) { + collectFieldNames(wfExpr, aggInputFields); + } + // Remove already-included fields + aggInputFields.remove(seqCol); + for (UnresolvedExpression groupExpr : groupList) { + aggInputFields.remove(extractGroupFieldName(groupExpr)); + } + for (String aggField : aggInputFields) { + rightFields.add(context.relBuilder.field(aggField)); + rightFieldNames.add(toRightSideFieldName(aggField)); + } + + context.relBuilder.project(rightFields, rightFieldNames); + RelNode rightProjected = context.relBuilder.build(); + + // Push left and right + context.relBuilder.push(leftWithHelpers); + context.relBuilder.push(rightProjected); + + // Build join condition using 2-input references + RexNode leftSeq = context.relBuilder.field(2, 0, seqCol); + RexNode rightSeq = context.relBuilder.field(2, 1, RIGHT_SIDE_SEQ_COLUMN); + + // Frame filter + RexNode frameFilter; + if (node.isCurrent()) { + RexNode lower = + context.relBuilder.call( + SqlStdOperatorTable.MINUS, leftSeq, context.relBuilder.literal(node.getWindow() - 1)); + frameFilter = context.relBuilder.between(rightSeq, lower, leftSeq); + } else { + RexNode lower = + context.relBuilder.call( + SqlStdOperatorTable.MINUS, leftSeq, context.relBuilder.literal(node.getWindow())); + RexNode upper = + context.relBuilder.call( + SqlStdOperatorTable.MINUS, leftSeq, context.relBuilder.literal(1)); + frameFilter = context.relBuilder.between(rightSeq, lower, upper); + } + + // Group filter + List groupFilters = new ArrayList<>(); + for (UnresolvedExpression groupExpr : groupList) { + String groupName = extractGroupFieldName(groupExpr); + RexNode leftGroup = context.relBuilder.field(2, 0, groupName); + RexNode rightGroup = context.relBuilder.field(2, 1, toRightSideFieldName(groupName)); + RexNode equalCondition = context.relBuilder.equals(leftGroup, rightGroup); + if (node.isBucketNullable()) { + RexNode bothNull = + context.relBuilder.and( + context.relBuilder.isNull(leftGroup), context.relBuilder.isNull(rightGroup)); + groupFilters.add(context.relBuilder.or(equalCondition, bothNull)); + } else { + groupFilters.add(equalCondition); + } + } + + RexNode joinCondition = + groupFilters.isEmpty() + ? frameFilter + : context.relBuilder.and(frameFilter, context.relBuilder.and(groupFilters)); + context.relBuilder.join(JoinRelType.LEFT, joinCondition); + + // After join: [left_fields(0..leftFieldCount-1), right_fields(leftFieldCount..)] + // Aggregate: group by all left fields, compute AGG on right fields + // The aggregate functions need to reference the right-side fields in the joined row + + // Build aggregate calls using the right-side field references + List aggCalls = buildAggCallsFromJoinedRight(node.getWindowFunctionList(), context); + + RelBuilder.GroupKey groupKey = + context.relBuilder.groupKey( + IntStream.range(0, leftFieldCount).mapToObj(context.relBuilder::field).toList()); + + context.relBuilder.aggregate(groupKey, aggCalls); + + // Resort by the sequence column + context.relBuilder.sort(context.relBuilder.field(seqCol)); + + // Cleanup helper columns + List cleanup = new ArrayList<>(); + for (String c : helperColsToCleanup) { + cleanup.add(context.relBuilder.field(c)); + } + context.relBuilder.projectExcept(cleanup); + return context.relBuilder.peek(); + } + + /** Collect field names referenced by an expression tree. */ + private void collectFieldNames(UnresolvedExpression expr, Set fieldNames) { + if (expr instanceof Field f) { + fieldNames.add(f.getField().toString()); + } else if (expr instanceof Alias a) { + collectFieldNames(a.getDelegated(), fieldNames); + } else if (expr instanceof WindowFunction wf) { + collectFieldNames(wf.getFunction(), fieldNames); + } else if (expr instanceof Function func) { + for (UnresolvedExpression arg : func.getFuncArgs()) { + collectFieldNames(arg, fieldNames); + } + } + } + + /** + * Build AggCall list for the self-join plan. The aggregate functions reference fields from the + * right side of the join, which carry the {@code __r___} prefix applied during right-side + * projection. This method rewrites the window function's field references to those prefixed + * names, unwraps the {@link WindowFunction} to its inner {@link Function}, and then delegates to + * the shared {@link #aggVisitor} so the self-join path reuses the same aggregate-resolution logic + * as regular {@code stats}/{@code eventstats} aggregations. + */ + private List buildAggCallsFromJoinedRight( + List windowFunctionList, CalcitePlanContext context) { + List aggCalls = new ArrayList<>(); + for (UnresolvedExpression wfExpr : windowFunctionList) { + UnresolvedExpression rewritten = rewriteWindowFunctionForSelfJoin(wfExpr); + aggCalls.add(aggVisitor.analyze(rewritten, context)); + } + return aggCalls; + } + + /** + * Rewrites a streamstats window function expression so that {@link #aggVisitor} can resolve it + * against the joined row type, where right-side fields carry the {@code __r___} prefix: + * + *
      + *
    • Unwraps {@link WindowFunction} to expose its inner {@link Function} (the aggregate). + *
    • Preserves the outer {@link Alias} so the aggregate output keeps its user-visible name. + *
    • Renames every {@link QualifiedName} / {@link Field} reference inside the function body to + * the prefixed right-side column name. + *
    + */ + private UnresolvedExpression rewriteWindowFunctionForSelfJoin(UnresolvedExpression expr) { + if (expr instanceof Alias a) { + return new Alias(a.getName(), rewriteWindowFunctionForSelfJoin(a.getDelegated())); + } + if (expr instanceof WindowFunction wf) { + return rewriteWindowFunctionForSelfJoin(wf.getFunction()); + } + if (expr instanceof Function func) { + List rewrittenArgs = + func.getFuncArgs().stream().map(this::rewriteFieldNamesToRightSide).toList(); + return new Function(func.getFuncName(), rewrittenArgs); + } + return expr; + } + + /** + * Recursively renames field references within an aggregate argument to their right-side alias. + */ + private UnresolvedExpression rewriteFieldNamesToRightSide(UnresolvedExpression expr) { + if (expr instanceof Field f && f.getField() instanceof QualifiedName qn) { + return new Field(toRightSideQualifiedName(qn), f.getFieldArgs()); + } + if (expr instanceof QualifiedName qn) { + return toRightSideQualifiedName(qn); + } + if (expr instanceof Alias a) { + return new Alias(a.getName(), rewriteFieldNamesToRightSide(a.getDelegated())); + } + if (expr instanceof Function func) { + List rewrittenArgs = + func.getFuncArgs().stream().map(this::rewriteFieldNamesToRightSide).toList(); + return new Function(func.getFuncName(), rewrittenArgs); + } + return expr; + } + + private static QualifiedName toRightSideQualifiedName(QualifiedName original) { + return new QualifiedName(toRightSideFieldName(original.toString())); + } + + private static String toRightSideFieldName(String originalName) { + return RIGHT_SIDE_FIELD_PREFIX + originalName + RIGHT_SIDE_FIELD_SUFFIX; + } + private RelNode buildResetHelperColumns(CalcitePlanContext context, StreamWindow node) { // 1. global sequence to define order RexNode rowNum = @@ -2464,6 +2926,40 @@ private String findTimestampField(RelDataType rowType) { return null; } + @Override + public RelNode visitUnion(Union node, CalcitePlanContext context) { + List inputNodes = new ArrayList<>(); + + for (UnresolvedPlan dataset : node.getDatasets()) { + UnresolvedPlan prunedDataset = dataset.accept(new EmptySourcePropagateVisitor(), null); + prunedDataset.accept(this, context); + inputNodes.add(context.relBuilder.build()); + } + + if (inputNodes.size() < 2) { + throw new IllegalArgumentException( + "Union command requires at least two datasets. Provided: " + inputNodes.size()); + } + + List unifiedInputs = + SchemaUnifier.buildUnifiedSchemaWithTypeCoercion(inputNodes, context); + + for (RelNode input : unifiedInputs) { + context.relBuilder.push(input); + } + context.relBuilder.union(true, unifiedInputs.size()); // true = UNION ALL + + if (node.getMaxout() != null) { + context.relBuilder.push( + LogicalSystemLimit.create( + LogicalSystemLimit.SystemLimitType.SUBSEARCH_MAXOUT, + context.relBuilder.build(), + context.relBuilder.literal(node.getMaxout()))); + } + + return context.relBuilder.peek(); + } + /* * Unsupported Commands of PPL with Calcite for OpenSearch 3.0.0-beta */ @@ -2692,18 +3188,43 @@ public RelNode visitAddColTotals(AddColTotals node, CalcitePlanContext context) @Override public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { - // 1. Visit source (child) table - visitChildren(node, context); RelBuilder builder = context.relBuilder; - // TODO: Limit the number of source rows to 100 for now, make it configurable. - builder.limit(0, 100); - if (node.isBatchMode()) { - tryToRemoveMetaFields(context, true); + + List startValuesForCalcite = null; + String startFieldName; + if (node.getStartValues() != null) { + // Literal start mode: create empty LogicalValues as dummy source (BiRel needs two inputs) + // And will ignore the previous pipe then. + RelDataType dummyType = + builder + .getTypeFactory() + .createStructType( + List.of(builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)), + List.of("_dummy")); + builder.values(dummyType); + startFieldName = null; + startValuesForCalcite = new ArrayList<>(); + for (var lit : node.getStartValues()) { + startValuesForCalcite.add(lit.getValue()); + } + } else { + if (node.getChild().isEmpty()) { + throw new SemanticCheckException( + "Field reference start requires a piped source." + + " Use literal start values (e.g. start='value') for top-level graphLookup."); + } + // Piped mode: visit source child + visitChildren(node, context); + // TODO: Limit the number of source rows to 100 for now, make it configurable. + builder.limit(0, 100); + if (node.isBatchMode()) { + tryToRemoveMetaFields(context, true); + } + startFieldName = node.getStartField().getField().toString(); } RelNode sourceTable = builder.build(); // 2. Extract parameters - String startFieldName = node.getStartField().getField().toString(); String fromFieldName = node.getFromField().getField().toString(); String toFieldName = node.getToField().getField().toString(); String outputFieldName = node.getAs().getField().toString(); @@ -2736,6 +3257,7 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { sourceTable, lookupTable, startFieldName, + startValuesForCalcite, fromFieldName, toFieldName, outputFieldName, @@ -3036,7 +3558,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { || node.getColumnSplit() == null || Objects.equals(config.limit, 0)) { // The output of chart is expected to be ordered by row split names - relBuilder.sort(relBuilder.field(0)); + relBuilder.sort(relBuilder.nullsLast(relBuilder.field(0))); return relBuilder.peek(); } @@ -3106,7 +3628,8 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.field(2)) .as(aggFieldName)); // The output of chart is expected to be ordered by row and column split names - relBuilder.sort(relBuilder.field(0), relBuilder.field(1)); + relBuilder.sort( + relBuilder.nullsLast(relBuilder.field(0)), relBuilder.nullsLast(relBuilder.field(1))); return relBuilder.peek(); } @@ -3574,8 +4097,13 @@ public RelNode visitMvExpand(MvExpand mvExpand, CalcitePlanContext context) { inputType.getField(fieldName, /*caseSensitive*/ true, /*elideRecord*/ false); if (inputField == null) { - throw new SemanticCheckException( - String.format("Field '%s' not found in the schema", fieldName)); + throw ErrorReport.wrap( + new SemanticCheckException( + String.format("Field '%s' not found in the schema", fieldName))) + .code(ErrorCode.FIELD_NOT_FOUND) + .location("while evaluating the input field for mvexpand") + .context("command", "mvexpand") + .build(); } final RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(field, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java b/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java index 0e5ac4a6e05..dba881b3fc3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java +++ b/core/src/main/java/org/opensearch/sql/calcite/QualifiedNameResolver.java @@ -16,6 +16,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; @@ -315,14 +317,20 @@ private static Optional resolveLambdaVariable( private static Optional replaceWithNullLiteralInCoalesce(CalcitePlanContext context) { log.debug("replaceWithNullLiteralInCoalesce() called"); if (context.isInCoalesceFunction()) { + // Use SqlTypeName.NULL so the resulting literal does not bias the least-restrictive + // common-type computation toward VARCHAR. See issue #5175: previously VARCHAR was used, + // which caused COALESCE(null, 42) to be inferred as VARCHAR and returned as "42". return Optional.of( context.rexBuilder.makeNullLiteral( - context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR))); + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.NULL))); } return Optional.empty(); } - private static RuntimeException getNotFoundException(QualifiedName node) { - return new IllegalArgumentException(String.format("Field [%s] not found.", node.toString())); + private static ErrorReport getNotFoundException(QualifiedName node) { + return ErrorReport.wrap( + new IllegalArgumentException(String.format("Field [%s] not found.", node.toString()))) + .code(ErrorCode.FIELD_NOT_FOUND) + .build(); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java b/core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java index 05380ce8c48..e01cbe3992d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java +++ b/core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java @@ -14,10 +14,16 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.SqlTypeName; /** - * Utility class for unifying schemas across multiple RelNodes. Throws an exception when type - * conflicts are detected. + * Utility class for unifying schemas across multiple RelNodes. Supports two strategies: + * + *
      + *
    • Conflict resolution (multisearch): throws on type mismatch, fills missing fields with NULL + *
    • Type coercion (union): widens compatible types (e.g. INTEGER→BIGINT), falls back to VARCHAR + * for incompatible types, fills missing fields with NULL + *
    */ public class SchemaUnifier { @@ -147,4 +153,236 @@ RelDataType getType() { return type; } } + + /** + * Builds unified schema with type coercion for UNION command. Coerces compatible types to a + * common supertype (e.g. int+float→float), falls back to VARCHAR for incompatible types, and + * fills missing fields with NULL. + */ + public static List buildUnifiedSchemaWithTypeCoercion( + List inputs, CalcitePlanContext context) { + if (inputs.isEmpty() || inputs.size() == 1) { + return inputs; + } + + List coercedInputs = coerceUnionTypes(inputs, context); + return unifySchemasForUnion(coercedInputs, context); + } + + /** + * Aligns schemas by projecting NULL for missing fields and CAST for type mismatches. Uses + * force=true to clear collation traits and prevent EnumerableMergeUnion cast exception. + */ + private static List unifySchemasForUnion( + List inputs, CalcitePlanContext context) { + List unifiedSchema = buildUnifiedSchemaForUnion(inputs); + List fieldNames = + unifiedSchema.stream().map(SchemaField::getName).collect(Collectors.toList()); + + List projectedNodes = new ArrayList<>(); + for (RelNode node : inputs) { + List projection = buildProjectionForUnion(node, unifiedSchema, context); + RelNode projectedNode = + context.relBuilder.push(node).project(projection, fieldNames, true).build(); + projectedNodes.add(projectedNode); + } + return projectedNodes; + } + + private static List buildUnifiedSchemaForUnion(List nodes) { + List schema = new ArrayList<>(); + Map seenFields = new HashMap<>(); + + for (RelNode node : nodes) { + for (RelDataTypeField field : node.getRowType().getFieldList()) { + if (!seenFields.containsKey(field.getName())) { + schema.add(new SchemaField(field.getName(), field.getType())); + seenFields.put(field.getName(), field.getType()); + } + } + } + return schema; + } + + private static List buildProjectionForUnion( + RelNode node, List unifiedSchema, CalcitePlanContext context) { + Map nodeFieldMap = + node.getRowType().getFieldList().stream() + .collect(Collectors.toMap(RelDataTypeField::getName, field -> field)); + + List projection = new ArrayList<>(); + for (SchemaField schemaField : unifiedSchema) { + RelDataTypeField nodeField = nodeFieldMap.get(schemaField.getName()); + + if (nodeField != null) { + RexNode fieldRef = context.rexBuilder.makeInputRef(node, nodeField.getIndex()); + if (!nodeField.getType().equals(schemaField.getType())) { + projection.add(context.rexBuilder.makeCast(schemaField.getType(), fieldRef)); + } else { + projection.add(fieldRef); + } + } else { + projection.add(context.rexBuilder.makeNullLiteral(schemaField.getType())); + } + } + return projection; + } + + /** Casts fields to their common supertypes across all inputs when types differ. */ + private static List coerceUnionTypes(List inputs, CalcitePlanContext context) { + Map> fieldTypeMap = new HashMap<>(); + for (RelNode input : inputs) { + for (RelDataTypeField field : input.getRowType().getFieldList()) { + String fieldName = field.getName(); + SqlTypeName typeName = field.getType().getSqlTypeName(); + if (typeName != null) { + fieldTypeMap.computeIfAbsent(fieldName, k -> new ArrayList<>()).add(typeName); + } + } + } + + Map targetTypeMap = new HashMap<>(); + for (Map.Entry> entry : fieldTypeMap.entrySet()) { + String fieldName = entry.getKey(); + List types = entry.getValue(); + + SqlTypeName commonType = types.getFirst(); + for (int i = 1; i < types.size(); i++) { + commonType = findCommonTypeForUnion(commonType, types.get(i)); + } + targetTypeMap.put(fieldName, commonType); + } + + boolean needsCoercion = false; + for (RelNode input : inputs) { + for (RelDataTypeField field : input.getRowType().getFieldList()) { + SqlTypeName targetType = targetTypeMap.get(field.getName()); + if (targetType != null && field.getType().getSqlTypeName() != targetType) { + needsCoercion = true; + break; + } + } + if (needsCoercion) break; + } + + if (!needsCoercion) { + return inputs; + } + + List coercedInputs = new ArrayList<>(); + for (RelNode input : inputs) { + List projections = new ArrayList<>(); + List projectionNames = new ArrayList<>(); + boolean needsProjection = false; + + for (RelDataTypeField field : input.getRowType().getFieldList()) { + String fieldName = field.getName(); + SqlTypeName currentType = field.getType().getSqlTypeName(); + SqlTypeName targetType = targetTypeMap.get(fieldName); + + RexNode fieldRef = context.rexBuilder.makeInputRef(input, field.getIndex()); + + if (currentType != targetType && targetType != null) { + projections.add(context.relBuilder.cast(fieldRef, targetType)); + needsProjection = true; + } else { + projections.add(fieldRef); + } + projectionNames.add(fieldName); + } + + if (needsProjection) { + context.relBuilder.push(input); + context.relBuilder.project(projections, projectionNames, true); + coercedInputs.add(context.relBuilder.build()); + } else { + coercedInputs.add(input); + } + } + + return coercedInputs; + } + + /** + * Returns the wider type for two SqlTypeNames. Within the same family, returns the wider type + * (e.g. INTEGER+BIGINT-->BIGINT). Across families, falls back to VARCHAR. + */ + private static SqlTypeName findCommonTypeForUnion(SqlTypeName type1, SqlTypeName type2) { + if (type1 == type2) { + return type1; + } + + if (type1 == SqlTypeName.NULL) { + return type2; + } + if (type2 == SqlTypeName.NULL) { + return type1; + } + + if (isNumericTypeForUnion(type1) && isNumericTypeForUnion(type2)) { + return getWiderNumericTypeForUnion(type1, type2); + } + + if (isStringTypeForUnion(type1) && isStringTypeForUnion(type2)) { + return SqlTypeName.VARCHAR; + } + + if (isTemporalTypeForUnion(type1) && isTemporalTypeForUnion(type2)) { + return getWiderTemporalTypeForUnion(type1, type2); + } + + return SqlTypeName.VARCHAR; + } + + private static boolean isNumericTypeForUnion(SqlTypeName typeName) { + return typeName == SqlTypeName.TINYINT + || typeName == SqlTypeName.SMALLINT + || typeName == SqlTypeName.INTEGER + || typeName == SqlTypeName.BIGINT + || typeName == SqlTypeName.FLOAT + || typeName == SqlTypeName.REAL + || typeName == SqlTypeName.DOUBLE + || typeName == SqlTypeName.DECIMAL; + } + + private static boolean isStringTypeForUnion(SqlTypeName typeName) { + return typeName == SqlTypeName.CHAR || typeName == SqlTypeName.VARCHAR; + } + + private static boolean isTemporalTypeForUnion(SqlTypeName typeName) { + return typeName == SqlTypeName.DATE + || typeName == SqlTypeName.TIMESTAMP + || typeName == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE; + } + + private static SqlTypeName getWiderNumericTypeForUnion(SqlTypeName type1, SqlTypeName type2) { + int rank1 = getNumericTypeRankForUnion(type1); + int rank2 = getNumericTypeRankForUnion(type2); + return rank1 >= rank2 ? type1 : type2; + } + + private static int getNumericTypeRankForUnion(SqlTypeName typeName) { + return switch (typeName) { + case TINYINT -> 1; + case SMALLINT -> 2; + case INTEGER -> 3; + case BIGINT -> 4; + case DECIMAL -> 5; + case REAL -> 6; + case FLOAT -> 7; + case DOUBLE -> 8; + default -> 0; + }; + } + + private static SqlTypeName getWiderTemporalTypeForUnion(SqlTypeName type1, SqlTypeName type2) { + if (type1 == SqlTypeName.TIMESTAMP || type2 == SqlTypeName.TIMESTAMP) { + return SqlTypeName.TIMESTAMP; + } + if (type1 == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE + || type2 == SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE) { + return SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE; + } + return SqlTypeName.DATE; + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java index 6d593787eb5..f30678b5531 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java @@ -6,10 +6,12 @@ package org.opensearch.sql.calcite.plan.rel; import java.util.List; +import javax.annotation.Nullable; import lombok.Getter; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.SingleRel; @@ -23,8 +25,23 @@ public abstract class Dedup extends SingleRel { final Integer allowedDuplication; final Boolean keepEmpty; final Boolean consecutive; + final @Nullable RelCollation inputCollation; + + /** + * Field names of the row type that {@link #inputCollation} was captured against. Used as a + * name-based anchor so callers can resolve the collation's stale indices after a planner rule has + * narrowed or replaced the dedup's input (typically a scan absorbing a narrowing project). + * + *

    Renames are handled by Calcite's own {@code Project.getMapping} propagation when a {@code + * Project} sits between dedup's old and new input — see {@code Dedup.copy}. This name list is + * only the fallback for cases where the replacement is not a {@code Project} (e.g. a scan that + * swaps in a narrower row type without a {@code Project} RelNode). Scans don't rename, so name + * equality is a stable identifier for that specific fallback. + * + *

    {@code null} iff {@link #inputCollation} is {@code null}. + */ + final @Nullable List inputCollationFieldNames; - /** */ protected Dedup( RelOptCluster cluster, RelTraitSet traitSet, @@ -32,7 +49,9 @@ protected Dedup( List dedupeFields, Integer allowedDuplication, Boolean keepEmpty, - Boolean consecutive) { + Boolean consecutive, + @Nullable RelCollation inputCollation, + @Nullable List inputCollationFieldNames) { super(cluster, traitSet, input); if (allowedDuplication <= 0) { throw new IllegalArgumentException("Number of duplicate events must be greater than 0"); @@ -44,6 +63,8 @@ protected Dedup( this.allowedDuplication = allowedDuplication; this.keepEmpty = keepEmpty; this.consecutive = consecutive; + this.inputCollation = inputCollation; + this.inputCollationFieldNames = inputCollationFieldNames; } @Override @@ -54,7 +75,9 @@ public final RelNode copy(RelTraitSet traitSet, List inputs) { this.dedupeFields, this.allowedDuplication, this.keepEmpty, - this.consecutive); + this.consecutive, + this.inputCollation, + this.inputCollationFieldNames); } public abstract Dedup copy( @@ -63,7 +86,9 @@ public abstract Dedup copy( List dedupeFields, Integer allowedDuplication, Boolean keepEmpty, - Boolean consecutive); + Boolean consecutive, + @Nullable RelCollation inputCollation, + @Nullable List inputCollationFieldNames); public Dedup copy(RelNode input, List dedupeFields) { return this.copy( @@ -72,7 +97,9 @@ public Dedup copy(RelNode input, List dedupeFields) { dedupeFields, this.allowedDuplication, this.keepEmpty, - this.consecutive); + this.consecutive, + this.inputCollation, + this.inputCollationFieldNames); } @Override @@ -81,7 +108,8 @@ public RelWriter explainTerms(RelWriter pw) { .item("dedup_fields", dedupeFields) .item("allowed_dedup", allowedDuplication) .item("keepEmpty", keepEmpty) - .item("consecutive", consecutive); + .item("consecutive", consecutive) + .itemIf("inputCollation", inputCollation, inputCollation != null); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java index 02ed97faf0c..8410664bc8d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java @@ -40,7 +40,8 @@ public abstract class GraphLookup extends BiRel { // TODO: use RexInputRef instead of String for there fields - protected final String startField; // Field in source table (start entities) + @Nullable protected final String startField; // Field in source table (start entities) + @Nullable protected final List startValues; // Literal start values (top-level mode) protected final String fromField; // Field in lookup table (edge source) protected final String toField; // Field in lookup table (edge target) protected final String outputField; // Name of output array field @@ -63,7 +64,8 @@ public abstract class GraphLookup extends BiRel { * @param traitSet Trait set * @param source Source table RelNode * @param lookup Lookup table RelNode - * @param startField Field name for start entities + * @param startField Field name for start entities (null in literal start mode) + * @param startValues Literal start values for top-level graphLookup (null in piped mode) * @param fromField Field name for outgoing edges * @param toField Field name for incoming edges * @param outputField Name of the output array field @@ -81,7 +83,8 @@ protected GraphLookup( RelTraitSet traitSet, RelNode source, RelNode lookup, - String startField, + @Nullable String startField, + @Nullable List startValues, String fromField, String toField, String outputField, @@ -94,6 +97,7 @@ protected GraphLookup( @Nullable RexNode filter) { super(cluster, traitSet, source, lookup); this.startField = startField; + this.startValues = startValues; this.fromField = fromField; this.toField = toField; this.outputField = outputField; @@ -124,7 +128,19 @@ protected RelDataType deriveRowType() { if (outputRowType == null) { RelDataTypeFactory.Builder builder = getCluster().getTypeFactory().builder(); - if (batchMode) { + if (startValues != null) { + // Literal start mode: Output = just [outputField: ARRAY] + RelDataType lookupRowType = getLookup().getRowType(); + if (this.depthField != null) { + final RelDataTypeFactory.Builder lookupBuilder = getCluster().getTypeFactory().builder(); + lookupBuilder.addAll(lookupRowType.getFieldList()); + RelDataType depthType = getCluster().getTypeFactory().createSqlType(SqlTypeName.INTEGER); + lookupBuilder.add(this.depthField, depthType); + lookupRowType = lookupBuilder.build(); + } + RelDataType arrayType = getCluster().getTypeFactory().createArrayType(lookupRowType, -1); + builder.add(outputField, arrayType); + } else if (batchMode) { // Batch mode: Output = [Array, Array] // First field: aggregated source rows as array RelDataType sourceRowType = getSource().getRowType(); @@ -172,7 +188,7 @@ protected RelDataType deriveRowType() { @Override public double estimateRowCount(RelMetadataQuery mq) { // Batch mode aggregates all source rows into a single output row - return batchMode ? 1 : getSource().estimateRowCount(mq); + return (startValues != null || batchMode) ? 1 : getSource().estimateRowCount(mq); } @Override @@ -184,6 +200,7 @@ public RelWriter explainTerms(RelWriter pw) { .item("depthField", depthField) .item("maxDepth", maxDepth) .item("bidirectional", bidirectional) + .itemIf("startValues", startValues, startValues != null) .itemIf("supportArray", supportArray, supportArray) .itemIf("batchMode", batchMode, batchMode) .itemIf("usePIT", usePIT, usePIT) diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java index 2a8eb5038d6..8d1d60bb783 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java @@ -8,10 +8,12 @@ import static org.opensearch.sql.calcite.plan.rule.PPLDedupConvertRule.DEDUP_CONVERT_RULE; import java.util.List; +import javax.annotation.Nullable; import org.apache.calcite.plan.Convention; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rex.RexNode; @@ -24,8 +26,19 @@ protected LogicalDedup( List dedupeFields, Integer allowedDuplication, Boolean keepEmpty, - Boolean consecutive) { - super(cluster, traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive); + Boolean consecutive, + @Nullable RelCollation inputCollation, + @Nullable List inputCollationFieldNames) { + super( + cluster, + traitSet, + input, + dedupeFields, + allowedDuplication, + keepEmpty, + consecutive, + inputCollation, + inputCollationFieldNames); } @Override @@ -35,10 +48,20 @@ public Dedup copy( List dedupeFields, Integer allowedDuplication, Boolean keepEmpty, - Boolean consecutive) { + Boolean consecutive, + @Nullable RelCollation inputCollation, + @Nullable List inputCollationFieldNames) { assert traitSet.containsIfApplicable(Convention.NONE); return new LogicalDedup( - getCluster(), traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive); + getCluster(), + traitSet, + input, + dedupeFields, + allowedDuplication, + keepEmpty, + consecutive, + inputCollation, + inputCollationFieldNames); } public static LogicalDedup create( @@ -47,10 +70,33 @@ public static LogicalDedup create( Integer allowedDuplication, Boolean keepEmpty, Boolean consecutive) { + return create(input, dedupeFields, allowedDuplication, keepEmpty, consecutive, null); + } + + public static LogicalDedup create( + RelNode input, + List dedupeFields, + Integer allowedDuplication, + Boolean keepEmpty, + Boolean consecutive, + @Nullable RelCollation inputCollation) { + // Record the field names from the current input's row type so callers that encounter a stale + // collation (after a planner rule has swapped in a different, non-Project-derived input) can + // still resolve the sort keys to positions in the new input by name. See + // Dedup.inputCollationFieldNames. + List fieldNames = inputCollation == null ? null : input.getRowType().getFieldNames(); final RelOptCluster cluster = input.getCluster(); RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE); return new LogicalDedup( - cluster, traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive); + cluster, + traitSet, + input, + dedupeFields, + allowedDuplication, + keepEmpty, + consecutive, + inputCollation, + fieldNames); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java index 94db3689f8c..98ea7301168 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java @@ -21,31 +21,13 @@ @Getter public class LogicalGraphLookup extends GraphLookup { - /** - * Creates a LogicalGraphLookup. - * - * @param cluster Cluster - * @param traitSet Trait set - * @param source Source table RelNode - * @param lookup Lookup table RelNode - * @param startField Field name for start entities - * @param fromField Field name for outgoing edges - * @param toField Field name for incoming edges - * @param outputField Name of the output array field - * @param depthField Name of the depth field - * @param maxDepth Maximum traversal depth (-1 for unlimited) - * @param bidirectional Whether to traverse edges in both directions - * @param supportArray Whether to support array-typed fields - * @param batchMode Whether to batch all source start values into a single unified BFS - * @param usePIT Whether to use PIT (Point In Time) search for complete results - * @param filter Optional filter condition for lookup table documents - */ protected LogicalGraphLookup( RelOptCluster cluster, RelTraitSet traitSet, RelNode source, RelNode lookup, - String startField, + @Nullable String startField, + @Nullable List startValues, String fromField, String toField, String outputField, @@ -62,6 +44,7 @@ protected LogicalGraphLookup( source, lookup, startField, + startValues, fromField, toField, outputField, @@ -74,28 +57,11 @@ protected LogicalGraphLookup( filter); } - /** - * Creates a LogicalGraphLookup with Convention.NONE. - * - * @param source Source table RelNode - * @param lookup Lookup table RelNode - * @param startField Field name for start entities - * @param fromField Field name for outgoing edges - * @param toField Field name for incoming edges - * @param outputField Name of the output array field - * @param depthField Named of the output depth field - * @param maxDepth Maximum traversal depth (-1 for unlimited) - * @param bidirectional Whether to traverse edges in both directions - * @param supportArray Whether to support array-typed fields - * @param batchMode Whether to batch all source start values into a single unified BFS - * @param usePIT Whether to use PIT (Point In Time) search for complete results - * @param filter Optional filter condition for lookup table documents - * @return A new LogicalGraphLookup instance - */ public static LogicalGraphLookup create( RelNode source, RelNode lookup, - String startField, + @Nullable String startField, + @Nullable List startValues, String fromField, String toField, String outputField, @@ -114,6 +80,7 @@ public static LogicalGraphLookup create( source, lookup, startField, + startValues, fromField, toField, outputField, @@ -134,6 +101,7 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { inputs.get(0), inputs.get(1), startField, + startValues, fromField, toField, outputField, diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java index c1b452a2ac0..39bd243ea5d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java @@ -7,10 +7,15 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; +import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexWindowBounds; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -47,28 +52,85 @@ public void onMatch(RelOptRuleCall call) { final LogicalDedup dedup = call.rel(0); RelBuilder relBuilder = call.builder(); relBuilder.push(dedup.getInput()); + RelCollation inputCollation = + resolveCollationToCurrentInput( + dedup.getInputCollation(), + dedup.getInputCollationFieldNames(), + dedup.getInput().getRowType().getFieldNames()); if (dedup.getKeepEmpty()) { - buildDedupOrNull(relBuilder, dedup.getDedupeFields(), dedup.getAllowedDuplication()); + buildDedupOrNull( + relBuilder, dedup.getDedupeFields(), dedup.getAllowedDuplication(), inputCollation); } else { - buildDedupNotNull(relBuilder, dedup.getDedupeFields(), dedup.getAllowedDuplication()); + buildDedupNotNull( + relBuilder, dedup.getDedupeFields(), dedup.getAllowedDuplication(), inputCollation); } call.transformTo(relBuilder.build()); } + /** + * Resolve {@code collation}'s indices against {@code currentNames} (dedup's current input row + * type). If the indices are still valid against {@code currentNames}, return {@code collation} + * unchanged. Otherwise, look each collation field up by name in {@code originalNames} (the row + * type captured at LogicalDedup creation time) and find its position in {@code currentNames}; if + * any field is no longer present, drop that key. + */ + private static @Nullable RelCollation resolveCollationToCurrentInput( + @Nullable RelCollation collation, + @Nullable List originalNames, + List currentNames) { + if (collation == null || collation.getFieldCollations().isEmpty()) { + return collation; + } + int currentSize = currentNames.size(); + int maxIdx = -1; + for (RelFieldCollation fc : collation.getFieldCollations()) { + maxIdx = Math.max(maxIdx, fc.getFieldIndex()); + } + if (maxIdx < currentSize) { + // Collation is already in the current input's index space — nothing to do. + return collation; + } + if (originalNames == null) { + return null; + } + List remapped = new ArrayList<>(); + for (RelFieldCollation fc : collation.getFieldCollations()) { + int oldIdx = fc.getFieldIndex(); + if (oldIdx < 0 || oldIdx >= originalNames.size()) { + continue; + } + int newIdx = currentNames.indexOf(originalNames.get(oldIdx)); + if (newIdx < 0) { + continue; + } + remapped.add(fc.withFieldIndex(newIdx)); + } + if (remapped.isEmpty()) { + return null; + } + return RelCollations.of(remapped); + } + public static void buildDedupOrNull( - RelBuilder relBuilder, List dedupeFields, Integer allowedDuplication) { + RelBuilder relBuilder, + List dedupeFields, + Integer allowedDuplication, + RelCollation inputCollation) { /* * | dedup 2 a, b keepempty=true - * LogicalProject(...) - * +- LogicalFilter(condition=[OR(IS NULL(a), IS NULL(b), <=(_row_number_dedup_, 1))]) - * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY a, b)]) - * +- ... + * LogicalSort(...) -- re-sort to restore input order + * +- LogicalProject(...) + * +- LogicalFilter(condition=[OR(IS NULL(a), IS NULL(b), <=(_row_number_dedup_, 1))]) + * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b)]) + * +- ... (input with Sort stripped) */ + List orderKeys = collationToOrderKeys(relBuilder, inputCollation); RexNode rowNumber = relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() .partitionBy(dedupeFields) + .orderBy(orderKeys) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_DEDUP); relBuilder.projectPlus(rowNumber); @@ -82,31 +144,36 @@ public static void buildDedupOrNull( _row_number_dedup_, relBuilder.literal(allowedDuplication)))); // DropColumns('_row_number_dedup_) relBuilder.projectExcept(_row_number_dedup_); + // Re-sort to restore the input order that was stripped before the window + restoreInputOrder(relBuilder, inputCollation); } public static void buildDedupNotNull( - RelBuilder relBuilder, List dedupeFields, Integer allowedDuplication) { + RelBuilder relBuilder, + List dedupeFields, + Integer allowedDuplication, + RelCollation inputCollation) { /* * | dedup 2 a, b keepempty=false - * LogicalProject(...) - * +- LogicalFilter(condition=[<=(_row_number_dedup_, n)])) - * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY a, b)]) - * +- LogicalFilter(condition=[AND(IS NOT NULL(a), IS NOT NULL(b))]) - * +- ... + * LogicalSort(...) -- re-sort to restore input order + * +- LogicalProject(...) + * +- LogicalFilter(condition=[<=(_row_number_dedup_, n)])) + * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b)]) + * +- LogicalFilter(condition=[AND(IS NOT NULL(a), IS NOT NULL(b))]) + * +- ... (input with Sort stripped) */ + List orderKeys = collationToOrderKeys(relBuilder, inputCollation); // Filter (isnotnull('a) AND isnotnull('b)) String rowNumberAlias = ROW_NUMBER_COLUMN_FOR_DEDUP; relBuilder.filter( relBuilder.and( dedupeFields.stream().map(relBuilder::isNotNull).collect(Collectors.toList()))); - // Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, - // specifiedwindowoundedpreceding$(), currentrow$())) AS _row_number_dedup_], ['a, 'b], ['a ASC - // NULLS FIRST, 'b ASC NULLS FIRST] RexNode rowNumber = relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() .partitionBy(dedupeFields) + .orderBy(orderKeys) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(rowNumberAlias); relBuilder.projectPlus(rowNumber); @@ -116,6 +183,44 @@ public static void buildDedupNotNull( relBuilder.lessThanOrEqual(rowNumberField, relBuilder.literal(allowedDuplication))); // DropColumns('_row_number_dedup_) relBuilder.projectExcept(rowNumberField); + // Re-sort to restore the input order that was stripped before the window + restoreInputOrder(relBuilder, inputCollation); + } + + /** + * Convert a RelCollation to a list of RexNode order keys using the RelBuilder's field references. + */ + private static List collationToOrderKeys(RelBuilder relBuilder, RelCollation collation) { + if (collation == null || collation.getFieldCollations().isEmpty()) { + return List.of(); + } + List orderKeys = new ArrayList<>(); + for (RelFieldCollation fieldCollation : collation.getFieldCollations()) { + RexNode fieldRef = relBuilder.field(fieldCollation.getFieldIndex()); + if (fieldCollation.direction.isDescending()) { + fieldRef = relBuilder.desc(fieldRef); + } + if (fieldCollation.nullDirection == RelFieldCollation.NullDirection.LAST) { + fieldRef = relBuilder.nullsLast(fieldRef); + } else if (fieldCollation.nullDirection == RelFieldCollation.NullDirection.FIRST) { + fieldRef = relBuilder.nullsFirst(fieldRef); + } + orderKeys.add(fieldRef); + } + return orderKeys; + } + + /** + * Re-apply a sort after dedup to restore the input order that may have been disrupted by the + * window operator. EnumerableWindow can re-partition data by the PARTITION BY key, destroying any + * upstream sort order. This explicit re-sort ensures the final output preserves the original + * order. + */ + private static void restoreInputOrder(RelBuilder relBuilder, RelCollation inputCollation) { + if (inputCollation != null && !inputCollation.getFieldCollations().isEmpty()) { + List sortKeys = collationToOrderKeys(relBuilder, inputCollation); + relBuilder.sort(sortKeys); + } } /** Rule configuration. */ diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java index 054141371b9..11eabfd483c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java @@ -5,14 +5,21 @@ package org.opensearch.sql.calcite.plan.rule; +import java.util.ArrayList; import java.util.List; import java.util.function.Predicate; import java.util.stream.Collectors; +import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldCollation; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexWindow; @@ -106,6 +113,8 @@ protected void apply( return; } + RelCollation inputCollation = extractCollationFromWindow(windows.get(0)); + RelBuilder relBuilder = call.builder(); relBuilder.push(bucketNonNullFilter.getInput()); List> targetProjections = @@ -117,13 +126,33 @@ protected void apply( targetProjections.stream().map(Pair::getValue).collect(Collectors.toList())); LogicalDedup dedup = - LogicalDedup.create(relBuilder.build(), dedupColumns, dedupNumber, false, false); + LogicalDedup.create( + relBuilder.build(), dedupColumns, dedupNumber, false, false, inputCollation); relBuilder.push(dedup); relBuilder.project(finalProject.getProjects(), finalProject.getRowType().getFieldNames()); call.transformTo(relBuilder.build()); } + private static @Nullable RelCollation extractCollationFromWindow(RexWindow window) { + if (window.orderKeys.isEmpty()) { + return null; + } + List fieldCollations = new ArrayList<>(); + for (RexFieldCollation rfc : window.orderKeys) { + if (!(rfc.left instanceof RexInputRef ref)) { + return null; + } + fieldCollations.add( + new RelFieldCollation(ref.getIndex(), rfc.getDirection(), rfc.getNullDirection())); + } + RelCollation collation = RelCollations.of(fieldCollations); + if (collation.equals(RelCollations.EMPTY)) { + return null; + } + return collation; + } + /** Rule configuration. */ @Value.Immutable public interface Config extends OpenSearchRuleConfig { diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteClassLoaderHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteClassLoaderHelper.java new file mode 100644 index 00000000000..b2367f653c3 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteClassLoaderHelper.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.utils; + +import java.util.concurrent.Callable; + +/** + * Helper for setting the thread context classloader before Calcite operations. This is needed for + * patched Calcite (CALCITE-3745): when analytics-engine is the parent classloader, Janino uses the + * parent's classloader which can't see SQL plugin classes. The patched Calcite checks {@code + * Thread.currentThread().getContextClassLoader()} first. This helper sets it to the SQL plugin's + * classloader (child) which can see both parent and child classes. + * + * @see CALCITE-3745 + * @see sql#5306 + */ +public final class CalciteClassLoaderHelper { + + private CalciteClassLoaderHelper() {} + + /** + * Run an action with the thread context classloader set to the caller's classloader. + * + * @param action the action to run + * @param callerClass the class whose classloader should be used (pass {@code MyClass.class}) + * @param the return type + * @return the result of the action + */ + public static T withCalciteClassLoader(Callable action, Class callerClass) { + Thread currentThread = Thread.currentThread(); + ClassLoader originalCl = currentThread.getContextClassLoader(); + currentThread.setContextClassLoader(callerClass.getClassLoader()); + try { + return action.call(); + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + currentThread.setContextClassLoader(originalCl); + } + } + + /** + * Run a void action with the thread context classloader set to the caller's classloader. + * + * @see #withCalciteClassLoader(Callable, Class) + */ + public static void withCalciteClassLoader(Runnable action, Class callerClass) { + withCalciteClassLoader( + () -> { + action.run(); + return null; + }, + callerClass); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java index a6d57ea01f6..54b9d4ffbaf 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java @@ -63,6 +63,7 @@ import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptTable.ViewExpander; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.plan.hep.HepProgramBuilder; @@ -74,6 +75,7 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.hint.HintStrategyTable; import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.rel.type.RelDataType; @@ -104,6 +106,8 @@ import org.opensearch.sql.calcite.plan.rule.OpenSearchRules; import org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule; import org.opensearch.sql.calcite.profile.PlanProfileBuilder; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.expression.function.PPLBuiltinOperators; import org.opensearch.sql.monitor.profile.ProfileContext; import org.opensearch.sql.monitor.profile.ProfileMetric; @@ -367,6 +371,36 @@ protected SqlToRelConverter getSqlToRelConverter( return new OpenSearchSqlToRelConverter( this, validator, catalogReader, this.cluster, convertletTable, config); } + + @Override + protected RelRoot trimUnusedFields(RelRoot root) { + final SqlToRelConverter.Config config = + SqlToRelConverter.config() + .withTrimUnusedFields(shouldTrim(root.rel)) + .withExpand(THREAD_EXPAND.get()) + .withInSubQueryThreshold(requireNonNull(THREAD_INSUBQUERY_THRESHOLD.get())); + // PPL analyzes into a pre-built RelNode before prepareStatement(rel). Reuse the incoming + // RelNode's cluster here so prepare-time trimming does not create replacement nodes under a + // different planner than the rest of the tree. + final SqlToRelConverter converter = + new OpenSearchSqlToRelConverter( + this, + getSqlValidator(), + catalogReader, + root.rel.getCluster(), + convertletTable, + config); + final boolean ordered = !root.collation.getFieldCollations().isEmpty(); + final boolean dml = SqlKind.DML.contains(root.kind); + return root.withRel(converter.trimUnusedFields(dml || ordered, root.rel)); + } + + private static boolean shouldTrim(RelNode rootRel) { + // For now, don't trim if there are more than 3 joins. The projects + // near the leaves created by trim migrate past joins and seem to + // prevent join-reordering. + return THREAD_TRIM.get() || RelOptUtil.countJoins(rootRel) < 2; + } } public static class OpenSearchSqlToRelConverter extends SqlToRelConverter { @@ -379,25 +413,102 @@ public OpenSearchSqlToRelConverter( RelOptCluster cluster, SqlRexConvertletTable convertletTable, Config config) { - super(viewExpander, validator, catalogReader, cluster, convertletTable, config); + this( + viewExpander, + validator, + catalogReader, + cluster, + convertletTable, + preserveHintStrategies(cluster, config), + true); + } + + private OpenSearchSqlToRelConverter( + ViewExpander viewExpander, + @Nullable SqlValidator validator, + CatalogReader catalogReader, + RelOptCluster cluster, + SqlRexConvertletTable convertletTable, + Config effectiveConfig, + boolean ignored) { + super(viewExpander, validator, catalogReader, cluster, convertletTable, effectiveConfig); this.relBuilder = - config + effectiveConfig .getRelBuilderFactory() .create( cluster, validator != null ? validator.getCatalogReader().unwrap(RelOptSchema.class) : null) - .transform(config.getRelBuilderConfigTransform()); + .transform(effectiveConfig.getRelBuilderConfigTransform()); } @Override protected RelFieldTrimmer newFieldTrimmer() { return new OpenSearchRelFieldTrimmer(validator, this.relBuilder); } + + // SqlToRelConverter always installs the hint strategy table from its config onto the cluster. + // When prepare-time trimming reuses an incoming RelNode cluster, preserve any PPL-specific + // aggregate hint strategies that were already registered during analysis. + private static Config preserveHintStrategies(RelOptCluster cluster, Config config) { + if (config.getHintStrategyTable() == HintStrategyTable.EMPTY + && cluster.getHintStrategies() != HintStrategyTable.EMPTY) { + return config.withHintStrategyTable(cluster.getHintStrategies()); + } + return config; + } } public static class OpenSearchRelRunners { + private static boolean isNonPushdownEnumerableAggregate(String message) { + return message.contains("Error while preparing plan") + && message.contains("CalciteEnumerableNestedAggregate"); + } + + // Detect if error is due to window functions in unsupported context (bins on time fields) + private static boolean isWindowBinOnTimeField(SQLException e) { + String errorMsg = e.getMessage(); + return errorMsg != null + && errorMsg.contains("Error while preparing plan") + && errorMsg.contains("WIDTH_BUCKET"); + } + + // Traverse Calcite SQL exceptions in search of the root cause, since Calcite's outer error + // messages aren't really usable for users + private static String rootCauseMessage(Throwable e) { + String rc = null; + if (e.getCause() != null) { + rc = rootCauseMessage(e.getCause()); + } + for (int i = 0; rc == null && i < e.getSuppressed().length; i++) { + rc = rootCauseMessage(e.getSuppressed()[i]); + } + return rc != null ? rc : e.getMessage(); + } + + private static void enrichErrorsForSpecialCases(ErrorReport.Builder report, SQLException e) { + if (e.getMessage().contains("Error while preparing plan [") && e.getCause() != null) { + // Generic 'something went wrong' planning error, try to get the cause + int planStart = e.getMessage().indexOf('['); + int planEnd = e.getMessage().lastIndexOf(']'); + report + .context("plan", e.getMessage().substring(planStart + 1, planEnd)) + .details(rootCauseMessage(e)); + } + if (isWindowBinOnTimeField(e)) { + report + .details( + "The 'bins' parameter on timestamp fields requires: (1) pushdown to be enabled" + + " (controlled by plugins.calcite.pushdown.enabled, enabled by default), and" + + " (2) the timestamp field to be used as an aggregation bucket (e.g., 'stats" + + " count() by @timestamp').") + .code(ErrorCode.UNSUPPORTED_OPERATION) + .context("is_window_bin_on_time_field", true) + .suggestion("check pushdown is enabled and review the aggregation"); + } + } + /** * Runs a relational expression by existing connection. This class copied from {@link * org.apache.calcite.tools.RelRunners#run(RelNode)} @@ -430,17 +541,12 @@ public RelNode visit(TableScan scan) { return preparedStatement; } catch (SQLException e) { // Detect if error is due to window functions in unsupported context (bins on time fields) - String errorMsg = e.getMessage(); - if (errorMsg != null - && errorMsg.contains("Error while preparing plan") - && errorMsg.contains("WIDTH_BUCKET")) { - throw new UnsupportedOperationException( - "The 'bins' parameter on timestamp fields requires: (1) pushdown to be enabled" - + " (controlled by plugins.calcite.pushdown.enabled, enabled by default), and" - + " (2) the timestamp field to be used as an aggregation bucket (e.g., 'stats" - + " count() by @timestamp')."); - } - throw Util.throwAsRuntime(e); + ErrorReport.Builder report = + ErrorReport.wrap(e) + .location("while compiling the optimized query plan for physical execution") + .code(ErrorCode.PLANNING_ERROR); + enrichErrorsForSpecialCases(report, e); + throw report.build(); } } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java index 915c45e7083..0326d3ee61d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintUtils.java @@ -6,8 +6,14 @@ package org.opensearch.sql.calcite.utils; import com.google.common.base.Suppliers; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; import java.util.function.Supplier; import lombok.experimental.UtilityClass; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.hint.HintStrategyTable; import org.apache.calcite.rel.hint.RelHint; @@ -20,6 +26,17 @@ public class PPLHintUtils { private static final String KEY_IGNORE_NULL_BUCKET = "ignoreNullBucket"; private static final String KEY_HAS_NESTED_AGG_CALL = "hasNestedAggCall"; + /** + * Encoded list of dedup sort keys, one per field, in pipe-separated {@code field:ORDER} form, + * e.g. {@code "gender:ASC|state:DESC"}. Each entry preserves the sort order from the original PPL + * {@code sort} collation so the pushed-down {@code top_hits} can emit a full multi-field sort + * array instead of only the first field. + */ + private static final String KEY_DEDUP_SORT_FIELDS = "dedupSortFields"; + + private static final String DEDUP_SORT_ENTRY_SEP = "|"; + private static final String DEDUP_SORT_FIELD_ORDER_SEP = ":"; + private static final Supplier HINT_STRATEGY_TABLE = Suppliers.memoize( () -> @@ -81,4 +98,75 @@ public static boolean hasNestedAggCall(Aggregate aggregate) { .getOrDefault(KEY_HAS_NESTED_AGG_CALL, "false") .equals("true")); } + + /** + * Add dedup sort info hint to aggregate so that AggregateAnalyzer can set top_hits sort. All + * field collations are propagated so a multi-field PPL {@code sort} ({@code sort state, -city | + * dedup ...}) is pushed down as a multi-field {@code top_hits} sort. + */ + public static void addDedupSortHintToAggregate( + RelBuilder relBuilder, RelCollation collation, java.util.List fieldNames) { + assert relBuilder.peek() instanceof LogicalAggregate + : "Hint HINT_AGG_ARGUMENTS can be added to LogicalAggregate only"; + String encoded = encodeDedupSortFields(collation, fieldNames); + if (encoded.isEmpty()) { + return; + } + final RelHint sortHint = + RelHint.builder(HINT_AGG_ARGUMENTS).hintOption(KEY_DEDUP_SORT_FIELDS, encoded).build(); + relBuilder.hints(sortHint); + if (relBuilder.getCluster().getHintStrategies() == HintStrategyTable.EMPTY) { + relBuilder.getCluster().setHintStrategies(HINT_STRATEGY_TABLE.get()); + } + } + + /** A single (field, order) entry from the dedup sort hint. */ + public record DedupSortKey(String field, String order) {} + + /** + * Return the dedup sort keys from aggregate hints, preserving the order from the original PPL + * {@code sort}. Empty list if not present. + */ + public static List getDedupSortKeys(Aggregate aggregate) { + return aggregate.getHints().stream() + .filter(hint -> hint.hintName.equals(HINT_AGG_ARGUMENTS)) + .map(hint -> hint.kvOptions.get(KEY_DEDUP_SORT_FIELDS)) + .filter(Objects::nonNull) + .findFirst() + .map(PPLHintUtils::decodeDedupSortFields) + .orElse(Collections.emptyList()); + } + + private static String encodeDedupSortFields(RelCollation collation, List fieldNames) { + StringBuilder sb = new StringBuilder(); + for (RelFieldCollation fc : collation.getFieldCollations()) { + int idx = fc.getFieldIndex(); + if (idx < 0 || idx >= fieldNames.size()) { + throw new IllegalStateException( + "Dedup sort collation index " + idx + " out of range for scan fields " + fieldNames); + } + if (sb.length() > 0) { + sb.append(DEDUP_SORT_ENTRY_SEP); + } + sb.append(fieldNames.get(idx)) + .append(DEDUP_SORT_FIELD_ORDER_SEP) + .append(fc.direction.isDescending() ? "DESC" : "ASC"); + } + return sb.toString(); + } + + private static List decodeDedupSortFields(String encoded) { + if (encoded == null || encoded.isEmpty()) { + return Collections.emptyList(); + } + List keys = new ArrayList<>(); + for (String entry : encoded.split("\\" + DEDUP_SORT_ENTRY_SEP)) { + int sep = entry.lastIndexOf(DEDUP_SORT_FIELD_ORDER_SEP); + if (sep <= 0 || sep == entry.length() - 1) { + continue; + } + keys.add(new DedupSortKey(entry.substring(0, sep), entry.substring(sep + 1))); + } + return keys; + } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index abf37e68392..fcd361ba229 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -84,6 +84,10 @@ private PPLOperandTypes() {} UDFOperandMetadata.wrap( (CompositeOperandTypeChecker) OperandTypes.ANY.or(OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.INTEGER))); + public static final UDFOperandMetadata ANY_OPTIONAL_STRING = + UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + OperandTypes.ANY.or(OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER))); public static final UDFOperandMetadata ANY_OPTIONAL_TIMESTAMP = UDFOperandMetadata.wrap( (CompositeOperandTypeChecker) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index b4e040762af..4d2dae4bd60 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -28,17 +28,24 @@ import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.volcano.VolcanoPlanner; +import org.apache.calcite.rel.BiRel; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.core.Uncollect; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; @@ -81,6 +88,7 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; String ROW_NUMBER_COLUMN_FOR_TRANSPOSE = "_row_number_transpose_"; + String VALUE_COLUMN_FOR_TRANSPOSE = "_value_transpose_"; static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { @@ -593,6 +601,144 @@ public Void visitCorrelVariable(RexCorrelVariable correlVar) { } } + /** + * Walk down the plan tree to find the first Sort node with non-empty collation. Stops at blocking + * operators that destroy ordering: + * + *
      + *
    • Aggregate - aggregation destroys input ordering + *
    • BiRel - covers Join, Correlate, and other binary relations + *
    • SetOp - covers Union, Intersect, Except + *
    • Uncollect - unnesting operation that may change ordering + *
    • Project with window functions (RexOver) - ordering determined by window's ORDER BY + *
    + * + * @param node the starting RelNode to backtrack from + * @return the collation found, or null if no sort or blocking operator encountered + */ + public static @Nullable RelCollation findInputCollation(RelNode node) { + while (node != null) { + if (node instanceof Aggregate + || node instanceof BiRel + || node instanceof SetOp + || node instanceof Uncollect) { + return null; + } + if (node instanceof LogicalProject && ((LogicalProject) node).containsOver()) { + return null; + } + if (node instanceof Sort sort) { + if (sort.getCollation() != null && !sort.getCollation().getFieldCollations().isEmpty()) { + return sort.getCollation(); + } + } + if (node.getInputs().isEmpty()) { + break; + } + node = node.getInput(0); + } + return null; + } + + /** + * Strip the Sort node from the input on the RelBuilder stack, returning its collation (remapped + * through any intermediate Projects). This is necessary because EnumerableWindow re-partitions + * data by PARTITION BY key, which can destroy input sort order. Calcite's metadata system + * (RelMdCollation) incorrectly propagates the input's collation through the Window, causing the + * optimizer to eliminate a post-dedup Sort as "redundant". By stripping the Sort before the + * window and re-adding it after, we break this incorrect metadata chain. + * + * @return the remapped collation of the stripped Sort, or null if no Sort was found or the sort + * field was projected away + */ + public static @Nullable RelCollation stripInputSort(RelBuilder relBuilder) { + RelNode input = relBuilder.peek(); + // First check whether a Sort exists in the (single-input) prefix of the subtree. If there is + // no Sort, there is nothing to strip and the index-space remapping below would be pointless. + if (findInputCollation(input) == null) { + return null; + } + // Ask Calcite's RelMdCollation for the subtree's output collation. This already accounts for + // intermediate Projects (they rewrite collation via a `Mappings.TargetMapping`), so we don't + // need to hand-roll an index remapper. + RelMetadataQuery mq = input.getCluster().getMetadataQuery(); + List collations = mq.collations(input); + RelCollation outputCollation = null; + if (collations != null) { + for (RelCollation c : collations) { + if (c != null && !c.getFieldCollations().isEmpty()) { + outputCollation = c; + break; + } + } + } + if (outputCollation == null) { + // Any collation field was projected away (or RelMdCollation couldn't propagate through the + // subtree). Leave the tree untouched and report no collation. + return null; + } + RelNode stripped = removeSortFromTree(input); + if (stripped != input) { + relBuilder.clear(); + relBuilder.push(stripped); + } + return outputCollation; + } + + /** + * Remove the first Sort node found in the tree, replacing it with its input. Only traverses + * through single-input operators (Filter, Project) that preserve order. + */ + private static RelNode removeSortFromTree(RelNode node) { + if (node instanceof Sort sort) { + if (sort.getCollation() != null + && !sort.getCollation().getFieldCollations().isEmpty() + && sort.fetch == null + && sort.offset == null) { + return sort.getInput(); + } + } + if (node.getInputs().size() == 1) { + RelNode child = node.getInput(0); + RelNode newChild = removeSortFromTree(child); + if (newChild != child) { + return node.copy(node.getTraitSet(), List.of(newChild)); + } + } + return node; + } + + /** + * Reverses the direction of a RelCollation. + * + * @param original The original collation to reverse + * @return A new RelCollation with reversed directions + */ + public static RelCollation reverseCollation(RelCollation original) { + if (original == null || original.getFieldCollations().isEmpty()) { + return original; + } + + List reversedFields = new ArrayList<>(); + for (RelFieldCollation field : original.getFieldCollations()) { + RelFieldCollation.Direction reversedDirection = field.direction.reverse(); + + // Handle null direction properly - reverse it as well + RelFieldCollation.NullDirection reversedNullDirection = + field.nullDirection == RelFieldCollation.NullDirection.FIRST + ? RelFieldCollation.NullDirection.LAST + : field.nullDirection == RelFieldCollation.NullDirection.LAST + ? RelFieldCollation.NullDirection.FIRST + : field.nullDirection; + + RelFieldCollation reversedField = + new RelFieldCollation(field.getFieldIndex(), reversedDirection, reversedNullDirection); + reversedFields.add(reversedField); + } + + return RelCollations.of(reversedFields); + } + /** Adds a rel node to the top of the stack while preserving the field names and aliases. */ static void replaceTop(RelBuilder relBuilder, RelNode relNode) { try { diff --git a/core/src/main/java/org/opensearch/sql/executor/DelegatingExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/DelegatingExecutionEngine.java new file mode 100644 index 00000000000..b38251233a0 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/executor/DelegatingExecutionEngine.java @@ -0,0 +1,81 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor; + +import java.util.List; +import java.util.Optional; +import lombok.RequiredArgsConstructor; +import lombok.extern.log4j.Log4j2; +import org.apache.calcite.rel.RelNode; +import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.planner.physical.PhysicalPlan; + +/** + * An {@link ExecutionEngine} that delegates Calcite RelNode execution to the first extension whose + * {@link ExecutionEngine#canVectorize(RelNode)} returns {@code true}, falling back to the default + * engine otherwise. Non-Calcite ({@link PhysicalPlan}) methods and unmatched RelNode plans are + * forwarded to the default engine. + */ +@RequiredArgsConstructor +@Log4j2 +public class DelegatingExecutionEngine implements ExecutionEngine { + + private final ExecutionEngine defaultEngine; + private final List extensions; + + @Override + public void execute(PhysicalPlan plan, ResponseListener listener) { + defaultEngine.execute(plan, listener); + } + + @Override + public void execute( + PhysicalPlan plan, ExecutionContext context, ResponseListener listener) { + defaultEngine.execute(plan, context, listener); + } + + @Override + public void explain(PhysicalPlan plan, ResponseListener listener) { + defaultEngine.explain(plan, listener); + } + + @Override + public boolean canVectorize(RelNode plan) { + return findExtension(plan).isPresent(); + } + + @Override + public void execute( + RelNode plan, CalcitePlanContext context, ResponseListener listener) { + Optional ext = findExtension(plan); + if (ext.isPresent()) { + log.info("Routing query to extension engine : {}", ext.get().getClass().getSimpleName()); + ext.get().execute(plan, context, listener); + } else { + defaultEngine.execute(plan, context, listener); + } + } + + @Override + public void explain( + RelNode plan, + ExplainMode mode, + CalcitePlanContext context, + ResponseListener listener) { + Optional ext = findExtension(plan); + if (ext.isPresent()) { + ext.get().explain(plan, mode, context, listener); + } else { + defaultEngine.explain(plan, mode, context, listener); + } + } + + private Optional findExtension(RelNode plan) { + return extensions.stream().filter(ext -> ext.canVectorize(plan)).findFirst(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java index e65db7b4065..da8eae41355 100644 --- a/core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java +++ b/core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java @@ -47,15 +47,32 @@ void execute( */ void explain(PhysicalPlan plan, ResponseListener listener); + /** + * Check if this engine supports vectorized execution of the given Calcite RelNode plan. + * Vectorized execution engines (e.g. Velox) override this to advertise support for specific plan + * shapes. The default returns {@code false}. + */ + default boolean canVectorize(RelNode plan) { + return false; + } + /** Execute calcite RelNode plan with {@link ExecutionContext} and call back response listener. */ default void execute( - RelNode plan, CalcitePlanContext context, ResponseListener listener) {} + RelNode plan, CalcitePlanContext context, ResponseListener listener) { + listener.onFailure( + new UnsupportedOperationException( + getClass().getSimpleName() + " does not support RelNode execution")); + } default void explain( RelNode plan, ExplainMode mode, CalcitePlanContext context, - ResponseListener listener) {} + ResponseListener listener) { + listener.onFailure( + new UnsupportedOperationException( + getClass().getSimpleName() + " does not support RelNode explain")); + } /** Data class that encapsulates ExprValue. */ @Data diff --git a/core/src/main/java/org/opensearch/sql/executor/OpenSearchTypeSystem.java b/core/src/main/java/org/opensearch/sql/executor/OpenSearchTypeSystem.java index b84d7dcf4d6..941f42de46c 100644 --- a/core/src/main/java/org/opensearch/sql/executor/OpenSearchTypeSystem.java +++ b/core/src/main/java/org/opensearch/sql/executor/OpenSearchTypeSystem.java @@ -22,6 +22,9 @@ public class OpenSearchTypeSystem extends RelDataTypeSystemImpl { // same with Spark DecimalType.MAX_SCALE public static int MAX_SCALE = 38; + /** Maximum fractional seconds precision for TIME and TIMESTAMP types (nanosecond). */ + public static final int MAX_DATETIME_PRECISION = 9; + private OpenSearchTypeSystem() {} @Override @@ -29,6 +32,20 @@ public int getMaxNumericPrecision() { return MAX_PRECISION; } + @Override + public int getMaxPrecision(SqlTypeName typeName) { + return switch (typeName) { + case TIME, + TIME_WITH_LOCAL_TIME_ZONE, + TIME_TZ, + TIMESTAMP, + TIMESTAMP_WITH_LOCAL_TIME_ZONE, + TIMESTAMP_TZ -> + MAX_DATETIME_PRECISION; + default -> super.getMaxPrecision(typeName); + }; + } + @Override public int getMaxNumericScale() { return MAX_SCALE; diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index 320325c8438..fe9d3e55dc1 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -35,6 +35,10 @@ import org.opensearch.sql.calcite.SysLimit; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit.SystemLimitType; +import org.opensearch.sql.calcite.utils.CalciteClassLoaderHelper; +import org.opensearch.sql.common.error.ErrorReport; +import org.opensearch.sql.common.error.QueryProcessingStage; +import org.opensearch.sql.common.error.StageErrorHandler; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.QueryContext; @@ -139,14 +143,37 @@ public void executeWithCalcite( QueryProfiling.activate(QueryContext.isProfileEnabled()); ProfileMetric analyzeMetric = profileContext.getOrCreateMetric(MetricName.ANALYZE); long analyzeStart = System.nanoTime(); - CalcitePlanContext context = - CalcitePlanContext.create( - buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); - context.setHighlightConfig(highlightConfig); - RelNode relNode = analyze(plan, context); - RelNode calcitePlan = convertToCalcitePlan(relNode, context); - analyzeMetric.set(System.nanoTime() - analyzeStart); - executionEngine.execute(calcitePlan, context, listener); + CalciteClassLoaderHelper.withCalciteClassLoader( + () -> { + CalcitePlanContext context = + CalcitePlanContext.create( + buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); + + context.setHighlightConfig(highlightConfig); + + // Wrap analyze with ANALYZING stage tracking + RelNode relNode = + StageErrorHandler.executeStage( + QueryProcessingStage.ANALYZING, + () -> analyze(plan, context), + "while preparing and validating the query plan"); + + // Wrap plan conversion with PLAN_CONVERSION stage tracking + RelNode calcitePlan = + StageErrorHandler.executeStage( + QueryProcessingStage.PLAN_CONVERSION, + () -> convertToCalcitePlan(relNode, context), + "while converting the query to an executable plan"); + + analyzeMetric.set(System.nanoTime() - analyzeStart); + + // Wrap execution with EXECUTING stage tracking + StageErrorHandler.executeStageVoid( + QueryProcessingStage.EXECUTING, + () -> executionEngine.execute(calcitePlan, context, listener), + "while running the query"); + }, + QueryService.class); } catch (Throwable t) { if (isCalciteFallbackAllowed(t) && !(t instanceof NonFallbackCalciteException)) { log.warn("Fallback to V2 query engine since got exception", t); @@ -169,17 +196,21 @@ public void explainWithCalcite( () -> { try { QueryProfiling.noop(); - CalcitePlanContext context = - CalcitePlanContext.create( - buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); - context.setHighlightConfig(highlightConfig); - context.run( + CalciteClassLoaderHelper.withCalciteClassLoader( () -> { - RelNode relNode = analyze(plan, context); - RelNode calcitePlan = convertToCalcitePlan(relNode, context); - executionEngine.explain(calcitePlan, mode, context, listener); + CalcitePlanContext context = + CalcitePlanContext.create( + buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); + context.setHighlightConfig(highlightConfig); + context.run( + () -> { + RelNode relNode = analyze(plan, context); + RelNode calcitePlan = convertToCalcitePlan(relNode, context); + executionEngine.explain(calcitePlan, mode, context, listener); + }, + settings); }, - settings); + QueryService.class); } catch (Throwable t) { if (isCalciteFallbackAllowed(t)) { log.warn("Fallback to V2 query engine since got exception", t); @@ -291,22 +322,31 @@ public PhysicalPlan plan(LogicalPlan plan) { return planner.plan(plan); } + private boolean isCalciteUnsupportedError(@Nullable Throwable t) { + return switch (t) { + case null -> false; + case CalciteUnsupportedException calciteUnsupportedException -> true; + case ErrorReport errorReport when t.getCause() instanceof CalciteUnsupportedException -> true; + default -> false; + }; + } + private boolean isCalciteFallbackAllowed(@Nullable Throwable t) { // We always allow fallback the query failed with CalciteUnsupportedException. // This is for avoiding breaking changes when enable Calcite by default. - if (t instanceof CalciteUnsupportedException) { + if (isCalciteUnsupportedError(t)) { return true; - } else { - if (settings != null) { - Boolean fallback_allowed = settings.getSettingValue(Settings.Key.CALCITE_FALLBACK_ALLOWED); - if (fallback_allowed == null) { - return false; - } - return fallback_allowed; - } else { - return true; + } + + if (settings != null) { + Boolean fallback_allowed = settings.getSettingValue(Settings.Key.CALCITE_FALLBACK_ALLOWED); + if (fallback_allowed == null) { + return false; } + return fallback_allowed; } + + return true; } private boolean isCalciteEnabled(Settings settings) { diff --git a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java new file mode 100644 index 00000000000..ddfe5fd3556 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java @@ -0,0 +1,151 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor.analytics; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.opensearch.analytics.exec.QueryPlanExecutor; +import org.opensearch.core.action.ActionListener; +import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.executor.ExecutionContext; +import org.opensearch.sql.executor.ExecutionEngine; +import org.opensearch.sql.executor.pagination.Cursor; +import org.opensearch.sql.monitor.profile.MetricName; +import org.opensearch.sql.monitor.profile.ProfileMetric; +import org.opensearch.sql.monitor.profile.QueryProfiling; +import org.opensearch.sql.planner.physical.PhysicalPlan; + +/** + * Execution engine adapter for the analytics engine (Project Mustang). + * + *

    Bridges the analytics engine's {@link QueryPlanExecutor} with the SQL plugin's {@link + * ExecutionEngine} response pipeline. Takes a Calcite {@link RelNode}, delegates execution to the + * analytics engine, and converts the raw results into {@link QueryResponse}. + */ +public class AnalyticsExecutionEngine implements ExecutionEngine { + + private final QueryPlanExecutor> planExecutor; + + public AnalyticsExecutionEngine(QueryPlanExecutor> planExecutor) { + this.planExecutor = planExecutor; + } + + /** Not supported. Analytics queries use the RelNode path exclusively. */ + @Override + public void execute(PhysicalPlan plan, ResponseListener listener) { + listener.onFailure( + new UnsupportedOperationException("Analytics engine only supports RelNode execution")); + } + + /** Not supported. Analytics queries use the RelNode path exclusively. */ + @Override + public void execute( + PhysicalPlan plan, ExecutionContext context, ResponseListener listener) { + listener.onFailure( + new UnsupportedOperationException("Analytics engine only supports RelNode execution")); + } + + /** Not supported. Analytics queries use the RelNode path exclusively. */ + @Override + public void explain(PhysicalPlan plan, ResponseListener listener) { + listener.onFailure( + new UnsupportedOperationException("Analytics engine only supports RelNode execution")); + } + + @Override + public void execute( + RelNode plan, CalcitePlanContext context, ResponseListener listener) { + // QueryPlanExecutor became asynchronous in analytics-framework 3.7 — execution is dispatched + // to a worker pool and results arrive on the listener. Record the execute metric in the + // listener callback, before delegating to the user-supplied listener, so the metric snapshot + // taken by SimpleJsonResponseFormatter sees the correct value. + ProfileMetric execMetric = QueryProfiling.current().getOrCreateMetric(MetricName.EXECUTE); + long execStart = System.nanoTime(); + + planExecutor.execute( + plan, + null, + new ActionListener<>() { + @Override + public void onResponse(Iterable rows) { + try { + List fields = plan.getRowType().getFieldList(); + List results = convertRows(rows, fields); + Schema schema = buildSchema(fields); + execMetric.set(System.nanoTime() - execStart); + listener.onResponse(new QueryResponse(schema, results, Cursor.None)); + } catch (Exception e) { + listener.onFailure(e); + } + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + }); + } + + @Override + public void explain( + RelNode plan, + ExplainMode mode, + CalcitePlanContext context, + ResponseListener listener) { + try { + String logical = RelOptUtil.toString(plan, mode.toExplainLevel()); + ExplainResponse response = + new ExplainResponse(new ExplainResponseNodeV2(logical, null, null)); + listener.onResponse(ExplainResponse.normalizeLf(response)); + } catch (Exception e) { + listener.onFailure(e); + } + } + + private List convertRows(Iterable rows, List fields) { + List results = new ArrayList<>(); + for (Object[] row : rows) { + Map valueMap = new LinkedHashMap<>(); + for (int i = 0; i < fields.size(); i++) { + String columnName = fields.get(i).getName(); + Object value = (i < row.length) ? row[i] : null; + valueMap.put(columnName, ExprValueUtils.fromObjectValue(value)); + } + results.add(ExprTupleValue.fromExprValueMap(valueMap)); + } + return results; + } + + private Schema buildSchema(List fields) { + List columns = new ArrayList<>(); + for (RelDataTypeField field : fields) { + ExprType exprType = convertType(field.getType()); + columns.add(new Schema.Column(field.getName(), null, exprType)); + } + return new Schema(columns); + } + + private ExprType convertType(RelDataType type) { + try { + return OpenSearchTypeFactory.convertRelDataTypeToExprType(type); + } catch (IllegalArgumentException e) { + return org.opensearch.sql.data.type.ExprCoreType.UNKNOWN; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java b/core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java index f42d376f649..bd0796b05af 100644 --- a/core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java +++ b/core/src/main/java/org/opensearch/sql/expression/datetime/StrftimeFormatterUtil.java @@ -249,4 +249,40 @@ private static long extractFirstNDigits(double value, int digits) { return isNegative ? -result : result; } + + /** Mapping from strftime specifiers to Java DateTimeFormatter patterns for parsing. */ + private static final Map STRFTIME_TO_JAVA_PARSE = + ImmutableMap.builder() + .put("%Y", "yyyy") + .put("%y", "yy") + .put("%m", "MM") + .put("%B", "MMMM") + .put("%b", "MMM") + .put("%d", "dd") + .put("%H", "HH") + .put("%I", "hh") + .put("%M", "mm") + .put("%S", "ss") + .put("%p", "a") + .put("%T", "HH:mm:ss") + .put("%F", "yyyy-MM-dd") + .put("%%", "'%'") + .build(); + + /** + * Convert a strftime format string to a Java DateTimeFormatter pattern suitable for parsing. + * + * @param strftimeFormat the strftime-style format string (e.g. {@code %Y-%m-%d %H:%M:%S}) + * @return a Java DateTimeFormatter pattern (e.g. {@code yyyy-MM-dd HH:mm:ss}) + */ + public static String toJavaPattern(String strftimeFormat) { + Matcher m = Pattern.compile("%[A-Za-z%]").matcher(strftimeFormat); + StringBuilder sb = new StringBuilder(); + while (m.find()) { + String replacement = STRFTIME_TO_JAVA_PARSE.getOrDefault(m.group(), m.group()); + m.appendReplacement(sb, Matcher.quoteReplacement(replacement)); + } + m.appendTail(sb); + return sb.toString(); + } } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java index 9a77a0d5a7c..318f32a41be 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java @@ -50,6 +50,10 @@ public SqlReturnTypeInference getReturnTypeInference() { RelDataType originalType = SqlLibraryOperators.ARRAY.getReturnTypeInference().inferReturnType(sqlOperatorBinding); RelDataType innerType = originalType.getComponentType(); + // Default empty/unknown element type to VARCHAR — see PR description for why. + if (innerType == null || isUnknownLikeType(innerType.getSqlTypeName())) { + innerType = typeFactory.createSqlType(SqlTypeName.VARCHAR); + } return createArrayType( typeFactory, typeFactory.createTypeWithNullability(innerType, true), true); } catch (Exception e) { @@ -63,6 +67,17 @@ public UDFOperandMetadata getOperandMetadata() { return null; } + /** + * Calcite's {@link SqlLibraryOperators#ARRAY} infers a {@code NULL}-element array for an empty + * call list and an {@code UNKNOWN}-element array when type inference can't pick one (e.g. all + * operands are typeless nulls). Either of those bubbles up to the analytics-engine route's + * substrait converter as "Unable to convert the type UNKNOWN" — substrait has no encoding for + * either marker. Treat both as needing a concrete fallback. + */ + private static boolean isUnknownLikeType(SqlTypeName sqlTypeName) { + return sqlTypeName == SqlTypeName.NULL || sqlTypeName == SqlTypeName.UNKNOWN; + } + public static class ArrayImplementor implements NotNullImplementor { @Override public Expression implement( diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 2aebf7efe34..0a5b0fe0e03 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -63,8 +63,12 @@ import org.opensearch.sql.expression.function.jsonUDF.JsonKeysFunctionImpl; import org.opensearch.sql.expression.function.jsonUDF.JsonSetFunctionImpl; import org.opensearch.sql.expression.function.udf.AutoConvertFunction; +import org.opensearch.sql.expression.function.udf.CTimeConvertFunction; import org.opensearch.sql.expression.function.udf.CryptographicFunction; +import org.opensearch.sql.expression.function.udf.Dur2SecConvertFunction; import org.opensearch.sql.expression.function.udf.MemkConvertFunction; +import org.opensearch.sql.expression.function.udf.MkTimeConvertFunction; +import org.opensearch.sql.expression.function.udf.MsTimeConvertFunction; import org.opensearch.sql.expression.function.udf.NumConvertFunction; import org.opensearch.sql.expression.function.udf.ParseFunction; import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction; @@ -431,6 +435,10 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlOperator RMCOMMA = new RmcommaConvertFunction().toUDF("RMCOMMA"); public static final SqlOperator RMUNIT = new RmunitConvertFunction().toUDF("RMUNIT"); public static final SqlOperator MEMK = new MemkConvertFunction().toUDF("MEMK"); + public static final SqlOperator CTIME = new CTimeConvertFunction().toUDF("CTIME"); + public static final SqlOperator MKTIME = new MkTimeConvertFunction().toUDF("MKTIME"); + public static final SqlOperator MSTIME = new MsTimeConvertFunction().toUDF("MSTIME"); + public static final SqlOperator DUR2SEC = new Dur2SecConvertFunction().toUDF("DUR2SEC"); public static final SqlOperator WIDTH_BUCKET = new org.opensearch.sql.expression.function.udf.binning.WidthBucketFunction() diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 30d7c055470..849c60fe4eb 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -39,6 +39,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.COT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.COUNT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CRC32; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CTIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURDATE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURRENT_DATE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURRENT_TIME; @@ -61,6 +62,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.DEGREES; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDEFUNCTION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DUR2SEC; import static org.opensearch.sql.expression.function.BuiltinFunctionName.E; import static org.opensearch.sql.expression.function.BuiltinFunctionName.EARLIEST; import static org.opensearch.sql.expression.function.BuiltinFunctionName.EQUAL; @@ -144,12 +146,14 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_DAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_HOUR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MKTIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MOD; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MODULUS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MODULUSFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTHNAME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MSTIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; @@ -991,6 +995,10 @@ void populate() { registerOperator(RMCOMMA, PPLBuiltinOperators.RMCOMMA); registerOperator(RMUNIT, PPLBuiltinOperators.RMUNIT); registerOperator(MEMK, PPLBuiltinOperators.MEMK); + registerOperator(CTIME, PPLBuiltinOperators.CTIME); + registerOperator(MKTIME, PPLBuiltinOperators.MKTIME); + registerOperator(MSTIME, PPLBuiltinOperators.MSTIME); + registerOperator(DUR2SEC, PPLBuiltinOperators.DUR2SEC); register( TOSTRING, diff --git a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonUtils.java b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonUtils.java index da8dc2a2413..16727295fea 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonUtils.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonUtils.java @@ -23,6 +23,15 @@ public class JsonUtils { public static String convertToJsonPath(String input) { if (input == null || input.isEmpty()) return "$"; + // Strip leading "$." or "$" to avoid double-prefixing (issue #5167) + if (input.startsWith("$.")) { + input = input.substring(2); + } else if (input.startsWith("$")) { + input = input.substring(1); + } + + if (input.isEmpty()) return "$"; + StringBuilder sb = new StringBuilder("$."); int i = 0; while (i < input.length()) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/CTimeConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/CTimeConvertFunction.java new file mode 100644 index 00000000000..6b507936348 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/CTimeConvertFunction.java @@ -0,0 +1,105 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.time.Instant; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.util.List; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; +import org.opensearch.sql.calcite.utils.PPLReturnTypes; +import org.opensearch.sql.expression.datetime.StrftimeFormatterUtil; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * PPL ctime() conversion function. Converts UNIX epoch timestamps to human-readable time strings + * using strftime format specifiers. Default format: {@code %m/%d/%Y %H:%M:%S}. + */ +public class CTimeConvertFunction extends ImplementorUDF { + + private static final String DEFAULT_FORMAT = "%m/%d/%Y %H:%M:%S"; + + public CTimeConvertFunction() { + super(new CTimeImplementor(), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return PPLReturnTypes.STRING_FORCE_NULLABLE; + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return PPLOperandTypes.ANY_OPTIONAL_STRING; + } + + public static class CTimeImplementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + if (translatedOperands.isEmpty()) { + return Expressions.constant(null, String.class); + } + Expression fieldValue = Expressions.box(translatedOperands.get(0)); + if (translatedOperands.size() == 1) { + return Expressions.call(CTimeConvertFunction.class, "convert", fieldValue); + } + Expression timeFormat = Expressions.box(translatedOperands.get(1)); + return Expressions.call( + CTimeConvertFunction.class, "convertWithFormat", fieldValue, timeFormat); + } + } + + public static String convert(Object value) { + return convertWithFormat(value, null); + } + + public static String convertWithFormat(Object value, Object timeFormatObj) { + Double timestamp = toEpochSeconds(value); + if (timestamp == null) { + return null; + } + String format = (timeFormatObj != null) ? timeFormatObj.toString().trim() : DEFAULT_FORMAT; + if (format.isEmpty()) { + return null; + } + try { + long seconds = timestamp.longValue(); + int nanos = (int) ((timestamp - seconds) * 1_000_000_000); + Instant instant = Instant.ofEpochSecond(seconds, nanos); + ZonedDateTime zdt = ZonedDateTime.ofInstant(instant, ZoneId.of("UTC")); + return StrftimeFormatterUtil.formatZonedDateTime(zdt, format).stringValue(); + } catch (Exception e) { + return null; + } + } + + public static Double toEpochSeconds(Object value) { + if (value == null) { + return null; + } + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + String str = value.toString().trim(); + if (str.isEmpty()) { + return null; + } + try { + return Double.parseDouble(str); + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/Dur2SecConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/Dur2SecConvertFunction.java new file mode 100644 index 00000000000..78facf743be --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/Dur2SecConvertFunction.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** PPL dur2sec() conversion function. Converts duration format {@code [D+]HH:MM:SS} to seconds */ +public class Dur2SecConvertFunction extends BaseConversionUDF { + + public static final Dur2SecConvertFunction INSTANCE = new Dur2SecConvertFunction(); + + // Matches [D+]HH:MM:SS — optional days prefix with + separator + private static final Pattern DURATION_PATTERN = + Pattern.compile("^(?:(\\d+)\\+)?(\\d{1,2}):(\\d{1,2}):(\\d{1,2})$"); + + public Dur2SecConvertFunction() { + super(Dur2SecConvertFunction.class); + } + + public static Object convert(Object value) { + return INSTANCE.convertValue(value); + } + + @Override + protected Object applyConversion(String preprocessedValue) { + Double existingSeconds = tryParseDouble(preprocessedValue); + if (existingSeconds != null) { + return existingSeconds; + } + + Matcher matcher = DURATION_PATTERN.matcher(preprocessedValue); + if (!matcher.matches()) { + return null; + } + + try { + int days = matcher.group(1) != null ? Integer.parseInt(matcher.group(1)) : 0; + int hours = Integer.parseInt(matcher.group(2)); + int minutes = Integer.parseInt(matcher.group(3)); + int seconds = Integer.parseInt(matcher.group(4)); + + if (hours >= 24 || minutes >= 60 || seconds >= 60) { + return null; + } + + return (double) (days * 86400 + hours * 3600 + minutes * 60 + seconds); + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/MkTimeConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/MkTimeConvertFunction.java new file mode 100644 index 00000000000..0127d63e9cd --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/MkTimeConvertFunction.java @@ -0,0 +1,106 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; +import org.opensearch.sql.expression.datetime.StrftimeFormatterUtil; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * PPL mktime() conversion function. Parses a human-readable time string into UNIX epoch seconds + * using strftime format specifiers. Default format: {@code %m/%d/%Y %H:%M:%S}. + */ +public class MkTimeConvertFunction extends ImplementorUDF { + + public static final MkTimeConvertFunction INSTANCE = new MkTimeConvertFunction(); + + private static final String DEFAULT_FORMAT = "%m/%d/%Y %H:%M:%S"; + + public MkTimeConvertFunction() { + super(new MkTimeImplementor(), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return ReturnTypes.explicit( + factory -> + factory.createTypeWithNullability(factory.createSqlType(SqlTypeName.DOUBLE), true)); + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return PPLOperandTypes.ANY_OPTIONAL_STRING; + } + + public static class MkTimeImplementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + if (translatedOperands.isEmpty()) { + return Expressions.constant(null, Double.class); + } + Expression fieldValue = Expressions.box(translatedOperands.get(0)); + if (translatedOperands.size() == 1) { + return Expressions.call(MkTimeConvertFunction.class, "convert", fieldValue); + } + Expression timeFormat = Expressions.box(translatedOperands.get(1)); + return Expressions.call( + MkTimeConvertFunction.class, "convertWithFormat", fieldValue, timeFormat); + } + } + + public static Object convert(Object value) { + return convertWithFormat(value, null); + } + + public static Object convertWithFormat(Object value, Object timeFormatObj) { + Double numeric = CTimeConvertFunction.toEpochSeconds(value); + if (numeric != null) { + return numeric; + } + if (value == null) { + return null; + } + String str = value instanceof String ? ((String) value).trim() : value.toString().trim(); + if (str.isEmpty()) { + return null; + } + + String strftimeFormat = + (timeFormatObj != null) ? timeFormatObj.toString().trim() : DEFAULT_FORMAT; + if (strftimeFormat.isEmpty()) { + return null; + } + return parseWithFormat(str, strftimeFormat); + } + + private static Object parseWithFormat(String dateStr, String strftimeFormat) { + try { + String javaPattern = StrftimeFormatterUtil.toJavaPattern(strftimeFormat); + DateTimeFormatter formatter = DateTimeFormatter.ofPattern(javaPattern, Locale.ROOT); + LocalDateTime dateTime = LocalDateTime.parse(dateStr, formatter); + return (double) dateTime.toEpochSecond(ZoneOffset.UTC); + } catch (DateTimeParseException | IllegalArgumentException e) { + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/MsTimeConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/MsTimeConvertFunction.java new file mode 100644 index 00000000000..362896b06b9 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/MsTimeConvertFunction.java @@ -0,0 +1,66 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * PPL mstime() conversion function. Converts {@code [MM:]SS.SSS} format to seconds The minutes + * portion is optional. + */ +public class MsTimeConvertFunction extends BaseConversionUDF { + + public static final MsTimeConvertFunction INSTANCE = new MsTimeConvertFunction(); + + // Matches optional MM: prefix, required SS, optional .SSS + private static final Pattern MSTIME_PATTERN = + Pattern.compile("^(?:(\\d{1,2}):)?(\\d{1,2})(?:\\.(\\d{1,3}))?$"); + + public MsTimeConvertFunction() { + super(MsTimeConvertFunction.class); + } + + public static Object convert(Object value) { + return INSTANCE.convertValue(value); + } + + @Override + protected Object applyConversion(String preprocessedValue) { + Double existingSeconds = tryParseDouble(preprocessedValue); + if (existingSeconds != null) { + return existingSeconds; + } + + Matcher matcher = MSTIME_PATTERN.matcher(preprocessedValue); + if (!matcher.matches()) { + return null; + } + + try { + int minutes = matcher.group(1) != null ? Integer.parseInt(matcher.group(1)) : 0; + int seconds = Integer.parseInt(matcher.group(2)); + + if (seconds >= 60) { + return null; + } + + double millis = 0.0; + if (matcher.group(3) != null) { + String milliStr = matcher.group(3); + // Pad to 3 digits + while (milliStr.length() < 3) { + milliStr += "0"; + } + millis = Double.parseDouble(milliStr.substring(0, 3)) / 1000.0; + } + + return (double) (minutes * 60 + seconds) + millis; + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java index 11e1a33afbd..fcc7d1a4640 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -43,7 +44,7 @@ public MinspanBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java index a8f2625b20f..e0b10803ea4 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -47,7 +48,7 @@ public RangeBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java index 6970e485525..8610eb8ee9c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -41,7 +42,7 @@ public SpanBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java b/core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java index 7e194dfbf22..599f0cce410 100644 --- a/core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java +++ b/core/src/main/java/org/opensearch/sql/expression/parse/RegexCommonUtils.java @@ -13,6 +13,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; /** * Common utilities for regex operations. Provides pattern caching and consistent matching behavior. @@ -69,11 +71,15 @@ public static List getNamedGroupCandidates(String pattern) { String groupName = anyGroupMatcher.group(1); if (!isValidJavaRegexGroupName(groupName)) { - throw new IllegalArgumentException( - String.format( - "Invalid capture group name '%s'. Java regex group names must start with a letter" - + " and contain only letters and digits.", - groupName)); + throw ErrorReport.wrap( + new IllegalArgumentException( + String.format("Invalid capture group name '%s'.", groupName))) + .code(ErrorCode.SYNTAX_ERROR) + .location("while validating the capture groups for the pattern") + .suggestion( + "Java Regex capture groups must be alphanumeric and start with a letter. Update the" + + " capture group to be alphanumeric.") + .build(); } } diff --git a/core/src/main/java/org/opensearch/sql/planner/Planner.java b/core/src/main/java/org/opensearch/sql/planner/Planner.java index 4625d72d3fc..8a015bc072b 100644 --- a/core/src/main/java/org/opensearch/sql/planner/Planner.java +++ b/core/src/main/java/org/opensearch/sql/planner/Planner.java @@ -14,6 +14,7 @@ import org.opensearch.sql.planner.optimizer.LogicalPlanOptimizer; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.Table; +import org.opensearch.sql.storage.read.TableScanBuilder; /** Planner that plans and chooses the optimal physical plan. */ @RequiredArgsConstructor @@ -34,7 +35,35 @@ public PhysicalPlan plan(LogicalPlan plan) { if (table == null) { return plan.accept(new DefaultImplementor<>(), null); } - return table.implement(table.optimize(optimize(plan))); + LogicalPlan optimized = table.optimize(optimize(plan)); + // Give scan builders a chance to reject shapes that push-down alone cannot express safely + // (e.g. operators that land above the scan but outside its push-down contract). + validateScanBuilders(optimized); + return table.implement(optimized); + } + + /** + * Walk the optimized plan and invoke {@link TableScanBuilder#validatePlan(LogicalPlan)} on every + * scan builder, passing the fully optimized root so scan builders can inspect their ancestors. + */ + private void validateScanBuilders(LogicalPlan optimized) { + optimized.accept( + new LogicalPlanNodeVisitor() { + @Override + public Void visitNode(LogicalPlan node, Object context) { + for (LogicalPlan child : node.getChild()) { + child.accept(this, context); + } + return null; + } + + @Override + public Void visitTableScanBuilder(TableScanBuilder node, Object context) { + node.validatePlan(optimized); + return null; + } + }, + null); } private Table findTable(LogicalPlan plan) { diff --git a/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java b/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java index b2da0b67a4b..3d2fb2872e5 100644 --- a/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java +++ b/core/src/main/java/org/opensearch/sql/storage/read/TableScanBuilder.java @@ -119,6 +119,19 @@ public boolean pushDownPageSize(LogicalPaginate paginate) { return false; } + /** + * Post-optimization validation hook. Called once by the planner after all push-down rules have + * run, with the fully optimized plan root. Subclasses may inspect the ancestors of this scan + * builder to reject planner shapes that push-down alone cannot express safely (for example, + * operators that land above the scan but outside its push-down contract and would be executed + * after the scan has already returned a bounded result set). Default is no-op. + * + * @param root the fully optimized logical plan containing this scan builder + */ + public void validatePlan(LogicalPlan root) { + // no-op by default + } + @Override public R accept(LogicalPlanNodeVisitor visitor, C context) { return visitor.visitTableScanBuilder(this, context); diff --git a/core/src/test/java/org/opensearch/sql/executor/DelegatingExecutionEngineTest.java b/core/src/test/java/org/opensearch/sql/executor/DelegatingExecutionEngineTest.java new file mode 100644 index 00000000000..6e7c59d6ac6 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/executor/DelegatingExecutionEngineTest.java @@ -0,0 +1,164 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.List; +import org.apache.calcite.rel.RelNode; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.planner.physical.PhysicalPlan; + +@ExtendWith(MockitoExtension.class) +class DelegatingExecutionEngineTest { + + @Mock private ExecutionEngine defaultEngine; + + @Mock private ExecutionEngine extension1; + + @Mock private ExecutionEngine extension2; + + @Mock private RelNode relNode; + + @Mock private CalcitePlanContext calciteContext; + + @Mock private PhysicalPlan physicalPlan; + + @Mock private ExecutionContext executionContext; + + @Mock private ResponseListener queryListener; + + @Mock private ResponseListener explainListener; + + @Test + void executeRelNodeRoutesToMatchingExtension() { + when(extension1.canVectorize(relNode)).thenReturn(true); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1, extension2)); + + engine.execute(relNode, calciteContext, queryListener); + + verify(extension1).execute(relNode, calciteContext, queryListener); + verify(defaultEngine, never()).execute(any(RelNode.class), any(), eq(queryListener)); + } + + @Test + void executeRelNodeFallsBackToDefaultWhenNoExtensionMatches() { + when(extension1.canVectorize(relNode)).thenReturn(false); + when(extension2.canVectorize(relNode)).thenReturn(false); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1, extension2)); + + engine.execute(relNode, calciteContext, queryListener); + + verify(defaultEngine).execute(relNode, calciteContext, queryListener); + verify(extension1, never()).execute(any(RelNode.class), any(), eq(queryListener)); + verify(extension2, never()).execute(any(RelNode.class), any(), eq(queryListener)); + } + + @Test + void executeRelNodeRoutesToFirstMatchingExtension() { + when(extension1.canVectorize(relNode)).thenReturn(true); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1, extension2)); + + engine.execute(relNode, calciteContext, queryListener); + + verify(extension1).execute(relNode, calciteContext, queryListener); + verify(extension2, never()).execute(any(RelNode.class), any(), eq(queryListener)); + } + + @Test + void explainRelNodeRoutesToMatchingExtension() { + when(extension1.canVectorize(relNode)).thenReturn(true); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.explain(relNode, ExplainMode.STANDARD, calciteContext, explainListener); + + verify(extension1).explain(relNode, ExplainMode.STANDARD, calciteContext, explainListener); + verify(defaultEngine, never()).explain(any(RelNode.class), any(), any(), eq(explainListener)); + } + + @Test + void explainRelNodeFallsBackToDefaultWhenNoExtensionMatches() { + when(extension1.canVectorize(relNode)).thenReturn(false); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.explain(relNode, ExplainMode.STANDARD, calciteContext, explainListener); + + verify(defaultEngine).explain(relNode, ExplainMode.STANDARD, calciteContext, explainListener); + } + + @Test + void canVectorizeReturnsTrueWhenExtensionMatches() { + when(extension1.canVectorize(relNode)).thenReturn(false); + when(extension2.canVectorize(relNode)).thenReturn(true); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1, extension2)); + + assert engine.canVectorize(relNode); + } + + @Test + void canVectorizeReturnsFalseWhenNoExtensionMatches() { + when(extension1.canVectorize(relNode)).thenReturn(false); + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + assert !engine.canVectorize(relNode); + } + + @Test + void physicalPlanExecuteDelegatesToDefault() { + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.execute(physicalPlan, queryListener); + + verify(defaultEngine).execute(physicalPlan, queryListener); + } + + @Test + void physicalPlanExecuteWithContextDelegatesToDefault() { + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.execute(physicalPlan, executionContext, queryListener); + + verify(defaultEngine).execute(physicalPlan, executionContext, queryListener); + } + + @Test + void physicalPlanExplainDelegatesToDefault() { + DelegatingExecutionEngine engine = + new DelegatingExecutionEngine(defaultEngine, List.of(extension1)); + + engine.explain(physicalPlan, explainListener); + + verify(defaultEngine).explain(physicalPlan, explainListener); + } + + @Test + void emptyExtensionsListAlwaysFallsBackToDefault() { + DelegatingExecutionEngine engine = new DelegatingExecutionEngine(defaultEngine, List.of()); + + engine.execute(relNode, calciteContext, queryListener); + + verify(defaultEngine).execute(relNode, calciteContext, queryListener); + } +} diff --git a/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java b/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java new file mode 100644 index 00000000000..4de596fb375 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java @@ -0,0 +1,396 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.executor.analytics; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.Collections; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeSystem; +import org.apache.calcite.sql.type.SqlTypeFactoryImpl; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.analytics.exec.QueryPlanExecutor; +import org.opensearch.core.action.ActionListener; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.calcite.SysLimit; +import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; +import org.opensearch.sql.executor.ExecutionEngine.QueryResponse; +import org.opensearch.sql.planner.physical.PhysicalPlan; + +class AnalyticsExecutionEngineTest { + + private AnalyticsExecutionEngine engine; + + @SuppressWarnings("unchecked") + private QueryPlanExecutor> mockExecutor; + + private CalcitePlanContext mockContext; + + @BeforeEach + void setUp() throws Exception { + mockExecutor = (QueryPlanExecutor>) mock(QueryPlanExecutor.class); + engine = new AnalyticsExecutionEngine(mockExecutor); + mockContext = mock(CalcitePlanContext.class); + setSysLimit(mockContext, SysLimit.DEFAULT); + } + + /** Sets the public final sysLimit field on a mocked CalcitePlanContext. */ + private static void setSysLimit(CalcitePlanContext context, SysLimit sysLimit) throws Exception { + Field field = CalcitePlanContext.class.getDeclaredField("sysLimit"); + field.setAccessible(true); + field.set(context, sysLimit); + } + + /** QueryPlanExecutor became async in analytics-framework 3.7 — stub the listener callback. */ + @SuppressWarnings("unchecked") + private void stubExecutorWith(RelNode relNode, Iterable rows) { + doAnswer( + inv -> { + ((ActionListener>) inv.getArgument(2)).onResponse(rows); + return null; + }) + .when(mockExecutor) + .execute(eq(relNode), any(), any(ActionListener.class)); + } + + @SuppressWarnings("unchecked") + private void stubExecutorWithError(RelNode relNode, Exception error) { + doAnswer( + inv -> { + ((ActionListener>) inv.getArgument(2)).onFailure(error); + return null; + }) + .when(mockExecutor) + .execute(eq(relNode), any(), any(ActionListener.class)); + } + + @Test + void executeRelNode_basicTypesAndRows() { + RelNode relNode = mockRelNode("name", SqlTypeName.VARCHAR, "age", SqlTypeName.INTEGER); + Iterable rows = Arrays.asList(new Object[] {"Alice", 30}, new Object[] {"Bob", 25}); + stubExecutorWith(relNode, rows); + + QueryResponse response = executeAndCapture(relNode); + String dump = dumpResponse(response); + + // Schema: 2 columns [name:STRING, age:INTEGER] + assertEquals(2, response.getSchema().getColumns().size(), "Column count. " + dump); + assertEquals("name", response.getSchema().getColumns().get(0).getName(), dump); + assertEquals(ExprCoreType.STRING, response.getSchema().getColumns().get(0).getExprType(), dump); + assertEquals("age", response.getSchema().getColumns().get(1).getName(), dump); + assertEquals( + ExprCoreType.INTEGER, response.getSchema().getColumns().get(1).getExprType(), dump); + + // Rows: [{name=Alice, age=30}, {name=Bob, age=25}] + assertEquals(2, response.getResults().size(), "Row count. " + dump); + assertEquals( + "Alice", response.getResults().get(0).tupleValue().get("name").value(), "Row 0. " + dump); + assertEquals( + 30, response.getResults().get(0).tupleValue().get("age").value(), "Row 0. " + dump); + assertEquals( + "Bob", response.getResults().get(1).tupleValue().get("name").value(), "Row 1. " + dump); + assertEquals( + 25, response.getResults().get(1).tupleValue().get("age").value(), "Row 1. " + dump); + + // Cursor: None + assertEquals(org.opensearch.sql.executor.pagination.Cursor.None, response.getCursor(), dump); + } + + @Test + void executeRelNode_numericTypes() { + RelNode relNode = + mockRelNode( + "b", SqlTypeName.TINYINT, + "s", SqlTypeName.SMALLINT, + "i", SqlTypeName.INTEGER, + "l", SqlTypeName.BIGINT, + "f", SqlTypeName.FLOAT, + "d", SqlTypeName.DOUBLE); + Iterable rows = + Collections.singletonList(new Object[] {(byte) 1, (short) 2, 3, 4L, 5.0f, 6.0}); + stubExecutorWith(relNode, rows); + + QueryResponse response = executeAndCapture(relNode); + String dump = dumpResponse(response); + + assertEquals(ExprCoreType.BYTE, response.getSchema().getColumns().get(0).getExprType(), dump); + assertEquals(ExprCoreType.SHORT, response.getSchema().getColumns().get(1).getExprType(), dump); + assertEquals( + ExprCoreType.INTEGER, response.getSchema().getColumns().get(2).getExprType(), dump); + assertEquals(ExprCoreType.LONG, response.getSchema().getColumns().get(3).getExprType(), dump); + assertEquals(ExprCoreType.FLOAT, response.getSchema().getColumns().get(4).getExprType(), dump); + assertEquals(ExprCoreType.DOUBLE, response.getSchema().getColumns().get(5).getExprType(), dump); + + // Verify actual values + assertEquals( + (byte) 1, + response.getResults().get(0).tupleValue().get("b").value(), + "byte value. " + dump); + assertEquals( + (short) 2, + response.getResults().get(0).tupleValue().get("s").value(), + "short value. " + dump); + assertEquals( + 3, response.getResults().get(0).tupleValue().get("i").value(), "int value. " + dump); + assertEquals( + 4L, response.getResults().get(0).tupleValue().get("l").value(), "long value. " + dump); + assertEquals( + 5.0f, response.getResults().get(0).tupleValue().get("f").value(), "float value. " + dump); + assertEquals( + 6.0, response.getResults().get(0).tupleValue().get("d").value(), "double value. " + dump); + } + + @Test + void executeRelNode_temporalTypes() { + RelNode relNode = + mockRelNode("dt", SqlTypeName.DATE, "tm", SqlTypeName.TIME, "ts", SqlTypeName.TIMESTAMP); + Iterable emptyRows = Collections.emptyList(); + stubExecutorWith(relNode, emptyRows); + + QueryResponse response = executeAndCapture(relNode); + String dump = dumpResponse(response); + + assertEquals(ExprCoreType.DATE, response.getSchema().getColumns().get(0).getExprType(), dump); + assertEquals(ExprCoreType.TIME, response.getSchema().getColumns().get(1).getExprType(), dump); + assertEquals( + ExprCoreType.TIMESTAMP, response.getSchema().getColumns().get(2).getExprType(), dump); + assertEquals(0, response.getResults().size(), "Should have 0 rows. " + dump); + } + + // Query size limit is now enforced in the RelNode plan (LogicalSystemLimit) before it reaches + // AnalyticsExecutionEngine. The engine trusts the executor to honor the limit. + + @Test + void executeRelNode_emptyResults() { + RelNode relNode = mockRelNode("name", SqlTypeName.VARCHAR); + Iterable emptyRows = Collections.emptyList(); + stubExecutorWith(relNode, emptyRows); + + QueryResponse response = executeAndCapture(relNode); + String dump = dumpResponse(response); + + assertEquals(1, response.getSchema().getColumns().size(), "Schema column count. " + dump); + assertEquals(0, response.getResults().size(), "Row count should be 0. " + dump); + } + + @Test + void executeRelNode_nullValues() { + RelNode relNode = mockRelNode("name", SqlTypeName.VARCHAR, "age", SqlTypeName.INTEGER); + Iterable rows = Collections.singletonList(new Object[] {null, null}); + stubExecutorWith(relNode, rows); + + QueryResponse response = executeAndCapture(relNode); + String dump = dumpResponse(response); + + assertEquals(1, response.getResults().size(), "Row count. " + dump); + assertTrue( + response.getResults().get(0).tupleValue().get("name").isNull(), + "name should be null. " + dump); + assertTrue( + response.getResults().get(0).tupleValue().get("age").isNull(), + "age should be null. " + dump); + } + + @Test + void executeRelNode_errorPropagation() { + RelNode relNode = mockRelNode("id", SqlTypeName.INTEGER); + stubExecutorWithError(relNode, new RuntimeException("Engine failure")); + + Exception error = executeAndCaptureError(relNode); + System.out.println(dumpError("executeRelNode_errorPropagation", error)); + + assertEquals( + "Engine failure", + error.getMessage(), + "Exception type: " + error.getClass().getSimpleName() + ", message: " + error.getMessage()); + } + + @Test + void physicalPlanExecute_callsOnFailure() { + PhysicalPlan physicalPlan = mock(PhysicalPlan.class); + AtomicReference errorRef = new AtomicReference<>(); + engine.execute(physicalPlan, failureListener(errorRef)); + + assertNotNull(errorRef.get(), "onFailure should have been called"); + System.out.println(dumpError("physicalPlanExecute_callsOnFailure", errorRef.get())); + assertTrue( + errorRef.get() instanceof UnsupportedOperationException, + "Expected UnsupportedOperationException, got: " + + errorRef.get().getClass().getSimpleName() + + " - " + + errorRef.get().getMessage()); + } + + @Test + void physicalPlanExecuteWithContext_callsOnFailure() { + PhysicalPlan physicalPlan = mock(PhysicalPlan.class); + AtomicReference errorRef = new AtomicReference<>(); + engine.execute( + physicalPlan, + org.opensearch.sql.executor.ExecutionContext.emptyExecutionContext(), + failureListener(errorRef)); + + assertNotNull(errorRef.get(), "onFailure should have been called"); + System.out.println(dumpError("physicalPlanExecuteWithContext_callsOnFailure", errorRef.get())); + assertTrue( + errorRef.get() instanceof UnsupportedOperationException, + "Expected UnsupportedOperationException, got: " + + errorRef.get().getClass().getSimpleName() + + " - " + + errorRef.get().getMessage()); + } + + @Test + void physicalPlanExplain_callsOnFailure() { + PhysicalPlan physicalPlan = mock(PhysicalPlan.class); + AtomicReference errorRef = new AtomicReference<>(); + engine.explain(physicalPlan, explainFailureListener(errorRef)); + + assertNotNull(errorRef.get(), "onFailure should have been called"); + System.out.println(dumpError("physicalPlanExplain_callsOnFailure", errorRef.get())); + assertTrue( + errorRef.get() instanceof UnsupportedOperationException, + "Expected UnsupportedOperationException, got: " + + errorRef.get().getClass().getSimpleName() + + " - " + + errorRef.get().getMessage()); + } + + // --- helpers --- + + private QueryResponse executeAndCapture(RelNode relNode) { + AtomicReference ref = new AtomicReference<>(); + engine.execute(relNode, mockContext, captureListener(ref)); + assertNotNull(ref.get(), "QueryResponse should not be null"); + // Always print the full response so test output shows exact results + System.out.println(dumpResponse(ref.get())); + return ref.get(); + } + + private Exception executeAndCaptureError(RelNode relNode) { + AtomicReference ref = new AtomicReference<>(); + engine.execute( + relNode, + mockContext, + new ResponseListener() { + @Override + public void onResponse(QueryResponse response) {} + + @Override + public void onFailure(Exception e) { + ref.set(e); + } + }); + assertNotNull(ref.get(), "onFailure should have been called"); + return ref.get(); + } + + private ResponseListener failureListener(AtomicReference ref) { + return new ResponseListener() { + @Override + public void onResponse(QueryResponse response) {} + + @Override + public void onFailure(Exception e) { + ref.set(e); + } + }; + } + + private ResponseListener explainFailureListener(AtomicReference ref) { + return new ResponseListener() { + @Override + public void onResponse(ExplainResponse response) {} + + @Override + public void onFailure(Exception e) { + ref.set(e); + } + }; + } + + private String dumpError(String testName, Exception e) { + return "\n--- " + + testName + + " ---\n" + + "Exception: " + + e.getClass().getSimpleName() + + "\n" + + "Message: " + + e.getMessage() + + "\n--- End ---"; + } + + /** Dumps the full QueryResponse into a readable string for test output and assertion messages. */ + private String dumpResponse(QueryResponse response) { + StringBuilder sb = new StringBuilder(); + sb.append("\n--- QueryResponse ---\n"); + + sb.append("Schema: ["); + sb.append( + response.getSchema().getColumns().stream() + .map(c -> c.getName() + ":" + c.getExprType().typeName()) + .collect(Collectors.joining(", "))); + sb.append("]\n"); + + sb.append("Rows (").append(response.getResults().size()).append("):\n"); + for (int i = 0; i < response.getResults().size(); i++) { + sb.append(" [").append(i).append("] "); + sb.append(response.getResults().get(i).tupleValue()); + sb.append("\n"); + } + + sb.append("Cursor: ").append(response.getCursor()).append("\n"); + sb.append("--- End ---"); + return sb.toString(); + } + + private RelNode mockRelNode(Object... nameTypePairs) { + SqlTypeFactoryImpl typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + RelDataTypeFactory.Builder builder = typeFactory.builder(); + for (int i = 0; i < nameTypePairs.length; i += 2) { + String name = (String) nameTypePairs[i]; + SqlTypeName typeName = (SqlTypeName) nameTypePairs[i + 1]; + builder.add(name, typeName); + } + RelDataType rowType = builder.build(); + + RelNode relNode = mock(RelNode.class); + when(relNode.getRowType()).thenReturn(rowType); + return relNode; + } + + private ResponseListener captureListener(AtomicReference ref) { + return new ResponseListener() { + @Override + public void onResponse(QueryResponse response) { + ref.set(response); + } + + @Override + public void onFailure(Exception e) { + throw new AssertionError("Unexpected failure", e); + } + }; + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java index 6dbc1901fa7..600a802615a 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java @@ -14,6 +14,12 @@ import java.util.Collections; import java.util.List; import java.util.stream.Collectors; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeSystem; +import org.apache.calcite.sql.ExplicitOperatorBinding; +import org.apache.calcite.sql.fun.SqlLibraryOperators; +import org.apache.calcite.sql.type.SqlTypeFactoryImpl; import org.apache.calcite.sql.type.SqlTypeName; import org.junit.jupiter.api.Test; @@ -302,4 +308,79 @@ public void testArrayWithCharTypePreservesNulls() { assertNull(list.get(1), "Null should be preserved during CHAR type conversion"); assertEquals("y", list.get(2)); } + + // ==================== RETURN-TYPE INFERENCE TESTS ==================== + // These tests cover the return-type fallback the analytics-engine route depends on: + // when Calcite can't infer a concrete element type (no operands, or all-null operands), + // we substitute VARCHAR so the call's return type is substrait-serializable. Without the + // fallback Calcite emits ARRAY / ARRAY, which fails substrait conversion + // with "Unable to convert the type UNKNOWN" downstream. + + /** array() — empty operand list — returns ARRAY. */ + @Test + public void testReturnTypeForEmptyCallIsVarcharArray() { + RelDataType returnType = inferReturnType(); + assertEquals(SqlTypeName.ARRAY, returnType.getSqlTypeName()); + RelDataType element = returnType.getComponentType(); + assertNotNull(element); + assertEquals(SqlTypeName.VARCHAR, element.getSqlTypeName()); + assertTrue(element.isNullable(), "Element type should be nullable per existing semantics"); + } + + /** array(NULL) — single typeless-null operand — also falls back to ARRAY. */ + @Test + public void testReturnTypeForAllNullOperandsIsVarcharArray() { + RelDataTypeFactory typeFactory = newTypeFactory(); + RelDataType nullType = typeFactory.createSqlType(SqlTypeName.NULL); + RelDataType returnType = inferReturnType(nullType); + assertEquals(SqlTypeName.ARRAY, returnType.getSqlTypeName()); + RelDataType element = returnType.getComponentType(); + assertNotNull(element); + assertEquals(SqlTypeName.VARCHAR, element.getSqlTypeName()); + } + + /** array(1) — INTEGER operand — preserves the inferred element type (no fallback). */ + @Test + public void testReturnTypeForIntegerOperandPreservesType() { + RelDataTypeFactory typeFactory = newTypeFactory(); + RelDataType intType = typeFactory.createSqlType(SqlTypeName.INTEGER); + RelDataType returnType = inferReturnType(intType); + assertEquals(SqlTypeName.ARRAY, returnType.getSqlTypeName()); + RelDataType element = returnType.getComponentType(); + assertNotNull(element); + assertEquals( + SqlTypeName.INTEGER, + element.getSqlTypeName(), + "Concrete element types must not be affected by the VARCHAR fallback"); + } + + /** array('a', 'b') — VARCHAR operands — already VARCHAR, fallback path doesn't fire. */ + @Test + public void testReturnTypeForVarcharOperandPreservesType() { + RelDataTypeFactory typeFactory = newTypeFactory(); + RelDataType varcharType = typeFactory.createSqlType(SqlTypeName.VARCHAR); + RelDataType returnType = inferReturnType(varcharType, varcharType); + assertEquals(SqlTypeName.ARRAY, returnType.getSqlTypeName()); + assertEquals(SqlTypeName.VARCHAR, returnType.getComponentType().getSqlTypeName()); + } + + /** + * Helper — invokes {@code new ArrayFunctionImpl().getReturnTypeInference().inferReturnType(...)} + * via Calcite's {@link ExplicitOperatorBinding}, which is the public test harness for exercising + * a return-type inference against a specific operand-type list. We bind it to {@link + * SqlLibraryOperators#ARRAY} so the inference's internal call to {@code + * SqlLibraryOperators.ARRAY.getReturnTypeInference().inferReturnType(...)} resolves the same + * operator the lambda delegates to. + */ + private static RelDataType inferReturnType(RelDataType... operandTypes) { + RelDataTypeFactory typeFactory = newTypeFactory(); + ExplicitOperatorBinding binding = + new ExplicitOperatorBinding( + typeFactory, SqlLibraryOperators.ARRAY, Arrays.asList(operandTypes)); + return new ArrayFunctionImpl().getReturnTypeInference().inferReturnType(binding); + } + + private static RelDataTypeFactory newTypeFactory() { + return new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + } } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionFunctionsTest.java index 163d6508445..490f72ba346 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionFunctionsTest.java @@ -336,4 +336,132 @@ public void testRmunitConvertNumericExtremes() { assertEquals(1.7e308, RmunitConvertFunction.convert("1.7e308")); assertEquals(-1.7e308, RmunitConvertFunction.convert("-1.7e308")); } + + // ctime() Function Tests + @Test + public void testCtimeConvertBasic() { + // Default format is %m/%d/%Y %H:%M:%S + assertEquals("10/18/2003 20:07:13", CTimeConvertFunction.convert(1066507633)); + assertEquals("01/01/1970 00:00:00", CTimeConvertFunction.convert(0)); + assertEquals("10/18/2003 20:07:13", CTimeConvertFunction.convert("1066507633")); + } + + @Test + public void testCtimeConvertInvalid() { + assertNull(CTimeConvertFunction.convert("invalid")); + assertNull(CTimeConvertFunction.convert(null)); + assertNull(CTimeConvertFunction.convert("")); + assertNull(CTimeConvertFunction.convert("abc123")); + } + + // mktime() Function Tests + @Test + public void testMktimeConvertBasic() { + // Default format is %m/%d/%Y %H:%M:%S + assertEquals(1066507633.0, MkTimeConvertFunction.convert("10/18/2003 20:07:13")); + assertEquals(946684800.0, MkTimeConvertFunction.convert("01/01/2000 00:00:00")); + assertEquals(1066473433.0, MkTimeConvertFunction.convert(1066473433)); + assertEquals(1066473433.0, MkTimeConvertFunction.convert("1066473433")); + } + + @Test + public void testMktimeConvertInvalid() { + assertNull(MkTimeConvertFunction.convert("invalid")); + assertNull(MkTimeConvertFunction.convert(null)); + assertNull(MkTimeConvertFunction.convert("")); + assertNull(MkTimeConvertFunction.convert("not-a-date")); + } + + // mstime() Function Tests + @Test + public void testMstimeConvertBasic() { + assertEquals(225.0, MsTimeConvertFunction.convert("03:45")); + assertEquals(225.123, MsTimeConvertFunction.convert("03:45.123")); + assertEquals(90.5, MsTimeConvertFunction.convert("01:30.5")); + assertEquals(3661.0, MsTimeConvertFunction.convert("61:01")); + + // SS.SSS without MM: prefix + assertEquals(45.123, MsTimeConvertFunction.convert("45.123")); + assertEquals(30.0, MsTimeConvertFunction.convert("30")); + + // Test already numeric + assertEquals(225.0, MsTimeConvertFunction.convert(225)); + assertEquals(225.0, MsTimeConvertFunction.convert("225")); + } + + @Test + public void testMstimeConvertEdgeCases() { + assertEquals(0.0, MsTimeConvertFunction.convert("00:00")); + assertEquals(0.001, MsTimeConvertFunction.convert("00:00.001")); + assertEquals(59.999, MsTimeConvertFunction.convert("00:59.999")); + } + + @Test + public void testMstimeConvertInvalid() { + assertNull(MsTimeConvertFunction.convert("invalid")); + assertNull(MsTimeConvertFunction.convert(null)); + assertNull(MsTimeConvertFunction.convert("")); + assertNull(MsTimeConvertFunction.convert("25:70")); + assertNull(MsTimeConvertFunction.convert("1:2:3")); + } + + // dur2sec() Function Tests + @Test + public void testDur2secConvertBasic() { + assertEquals(5025.0, Dur2SecConvertFunction.convert("01:23:45")); + assertEquals(3661.0, Dur2SecConvertFunction.convert("01:01:01")); + assertEquals(217815.0, Dur2SecConvertFunction.convert("2+12:30:15")); + assertEquals(90061.0, Dur2SecConvertFunction.convert("1+01:01:01")); + assertEquals(5025.0, Dur2SecConvertFunction.convert(5025)); + assertEquals(5025.0, Dur2SecConvertFunction.convert("5025")); + } + + @Test + public void testDur2secConvertEdgeCases() { + assertEquals(0.0, Dur2SecConvertFunction.convert("00:00:00")); + assertEquals(86400.0, Dur2SecConvertFunction.convert("1+00:00:00")); + assertEquals(3599.0, Dur2SecConvertFunction.convert("00:59:59")); + } + + @Test + public void testDur2secConvertInvalid() { + assertNull(Dur2SecConvertFunction.convert("invalid")); + assertNull(Dur2SecConvertFunction.convert(null)); + assertNull(Dur2SecConvertFunction.convert("")); + assertNull(Dur2SecConvertFunction.convert("25:70:80")); + assertNull(Dur2SecConvertFunction.convert("1:2")); + assertNull(Dur2SecConvertFunction.convert("1+2")); + } + + // timeformat tests for mktime() and ctime() + @Test + public void testMktimeWithCustomTimeformat() { + // Strftime format specifiers + assertEquals( + 1066507633.0, + MkTimeConvertFunction.convertWithFormat("18/10/2003 20:07:13", "%d/%m/%Y %H:%M:%S")); + assertEquals( + 1066507633.0, + MkTimeConvertFunction.convertWithFormat("2003-10-18 20:07:13", "%Y-%m-%d %H:%M:%S")); + assertEquals( + 946684800.0, + MkTimeConvertFunction.convertWithFormat("01/01/2000 00:00:00", "%d/%m/%Y %H:%M:%S")); + + // Invalid format returns null + assertNull(MkTimeConvertFunction.convertWithFormat("2003-10-18 20:07:13", "invalid format")); + + assertNull(MkTimeConvertFunction.convertWithFormat("10/18/2003 20:07:13", "")); + } + + @Test + public void testCtimeWithCustomTimeformat() { + // Strftime format specifiers + assertEquals( + "2003-10-18 20:07:13", + CTimeConvertFunction.convertWithFormat(1066507633, "%Y-%m-%d %H:%M:%S")); + assertEquals("18/10/2003", CTimeConvertFunction.convertWithFormat(1066507633, "%d/%m/%Y")); + assertEquals("1970", CTimeConvertFunction.convertWithFormat(0, "%Y")); + + assertNull(CTimeConvertFunction.convertWithFormat(1066507633, "")); + } } diff --git a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java index 8159fd6c115..5916205e0f4 100644 --- a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java @@ -19,6 +19,8 @@ import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.LiteralExpression; +import org.opensearch.sql.expression.function.jsonUDF.JsonDeleteFunctionImpl; +import org.opensearch.sql.expression.function.jsonUDF.JsonSetFunctionImpl; import org.opensearch.sql.expression.function.jsonUDF.JsonUtils; @ExtendWith(MockitoExtension.class) @@ -65,6 +67,18 @@ void test_convertToJsonPath() { assertEquals(targetJsonPath, convertedJsonPath); } + @Test + void test_convertToJsonPathWithDollarPrefix() { + // Issue #5167: paths already starting with $ or $. should not be double-prefixed + assertEquals("$.name", convertToJsonPath("$.name")); + assertEquals("$.a.b.c", convertToJsonPath("$.a.b.c")); + assertEquals("$.[*]", convertToJsonPath("$.[*]")); + assertEquals("$.a[2].c", convertToJsonPath("$.a[2].c")); + assertEquals("$.[3].bc[*].d[1]", convertToJsonPath("$.[3].bc[*].d[1]")); + // Bare $ should return $ + assertEquals("$", convertToJsonPath("$")); + } + @Test void test_convertToJsonPathWithWrongPath() { IllegalArgumentException e = @@ -100,6 +114,23 @@ void test_jsonPathExpand() { assertEquals(expandJsonPath(node, candidate4), target4); } + @Test + void test_jsonSetWithDollarPrefixedPath() { + // Issue #5167: json_set with $.key path should work correctly + Object result = + JsonSetFunctionImpl.eval( + "{\"name\":\"alice\",\"scores\":[90,85,92]}", "$.name", "modified_alice"); + assertEquals("{\"name\":\"modified_alice\",\"scores\":[90,85,92]}", result); + } + + @Test + void test_jsonDeleteWithDollarPrefixedPath() throws Exception { + // Issue #5167: json_delete with $.key path should remove the key + Object result = + JsonDeleteFunctionImpl.eval("{\"name\":\"alice\",\"scores\":[90,85,92]}", "$.name"); + assertEquals("{\"scores\":[90,85,92]}", result); + } + @Test void test_jsonPathExpandAtArray() { String jsonStr = "[{\"c\": 1}, {\"c\": 1}, {\"c\": 1}]"; diff --git a/core/src/test/java/org/opensearch/sql/expression/parse/RegexCommonUtilsTest.java b/core/src/test/java/org/opensearch/sql/expression/parse/RegexCommonUtilsTest.java index 2503b3929f1..e20c149d86b 100644 --- a/core/src/test/java/org/opensearch/sql/expression/parse/RegexCommonUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/parse/RegexCommonUtilsTest.java @@ -11,6 +11,7 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.error.ErrorReport; public class RegexCommonUtilsTest { @@ -197,10 +198,8 @@ public void testGetNamedGroupCandidatesWithNumericNames() { public void testGetNamedGroupCandidatesWithInvalidCharactersThrowsException() { // Test that groups with invalid characters throw exception (even if some are valid) String pattern = "(?[a-z]+)\\s+(?<123invalid>[0-9]+)\\s+(?.*)"; - IllegalArgumentException exception = - assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(pattern)); + ErrorReport exception = + assertThrows(ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(pattern)); // Should fail on the first invalid group name found assertTrue(exception.getMessage().contains("Invalid capture group name")); } @@ -217,74 +216,65 @@ public void testGetNamedGroupCandidatesValidAlphanumeric() { @Test public void testGetNamedGroupCandidatesWithUnderscore() { - // Test that underscores in named groups throw IllegalArgumentException + // Test that underscores in named groups throw ErrorReport String patternWithUnderscore = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithUnderscore)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain_name'")); - assertTrue( - exception - .getMessage() - .contains("must start with a letter and contain only letters and digits")); + assertTrue(exception.getSuggestion().contains("must be alphanumeric")); } @Test public void testGetNamedGroupCandidatesWithHyphen() { - // Test that hyphens in named groups throw IllegalArgumentException + // Test that hyphens in named groups throw ErrorReport String patternWithHyphen = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(patternWithHyphen)); + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithHyphen)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain-name'")); - assertTrue( - exception - .getMessage() - .contains("must start with a letter and contain only letters and digits")); + assertTrue(exception.getSuggestion().contains("must be alphanumeric")); } @Test public void testGetNamedGroupCandidatesWithDot() { - // Test that dots in named groups throw IllegalArgumentException + // Test that dots in named groups throw ErrorReport String patternWithDot = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(patternWithDot)); + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithDot)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain.name'")); } @Test public void testGetNamedGroupCandidatesWithSpace() { - // Test that spaces in named groups throw IllegalArgumentException + // Test that spaces in named groups throw ErrorReport String patternWithSpace = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(patternWithSpace)); + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithSpace)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain name'")); } @Test public void testGetNamedGroupCandidatesStartingWithDigit() { - // Test that group names starting with digit throw IllegalArgumentException + // Test that group names starting with digit throw ErrorReport String patternStartingWithDigit = ".+@(?<1domain>.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternStartingWithDigit)); assertTrue(exception.getMessage().contains("Invalid capture group name '1domain'")); } @Test public void testGetNamedGroupCandidatesWithSpecialCharacters() { - // Test that special characters in named groups throw IllegalArgumentException + // Test that special characters in named groups throw ErrorReport String patternWithSpecialChar = ".+@(?.+)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithSpecialChar)); assertTrue(exception.getMessage().contains("Invalid capture group name 'domain@name'")); } @@ -304,10 +294,9 @@ public void testGetNamedGroupCandidatesWithMixedInvalidValid() { // Test that even one invalid group name fails the entire validation String patternWithMixed = "(?[a-z]+)\\s+(?[0-9]+)\\s+(?.*)"; - IllegalArgumentException exception = + ErrorReport exception = assertThrows( - IllegalArgumentException.class, - () -> RegexCommonUtils.getNamedGroupCandidates(patternWithMixed)); + ErrorReport.class, () -> RegexCommonUtils.getNamedGroupCandidates(patternWithMixed)); assertTrue(exception.getMessage().contains("Invalid capture group name 'invalid_name'")); } } diff --git a/direct-query/src/test/java/org/opensearch/sql/directquery/rest/RestDirectQueryManagementActionTest.java b/direct-query/src/test/java/org/opensearch/sql/directquery/rest/RestDirectQueryManagementActionTest.java index 3193e6506dc..3e35e0206b8 100644 --- a/direct-query/src/test/java/org/opensearch/sql/directquery/rest/RestDirectQueryManagementActionTest.java +++ b/direct-query/src/test/java/org/opensearch/sql/directquery/rest/RestDirectQueryManagementActionTest.java @@ -103,16 +103,16 @@ public void testWhenDataSourcesAreEnabled() { "{\"query\":\"up\",\"language\":\"promql\",\"options\":{\"queryType\":\"instant\",\"time\":\"1609459200\"}}"; when(request.contentParser()) .thenReturn( - new org.opensearch.common.xcontent.json.JsonXContentParser( + org.opensearch.common.xcontent.json.JsonXContent.jsonXContent.createParser( org.opensearch.core.xcontent.NamedXContentRegistry.EMPTY, org.opensearch.core.xcontent.DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - new com.fasterxml.jackson.core.JsonFactory().createParser(requestContent))); + requestContent)); when(request.contentParser()) .thenReturn( - new org.opensearch.common.xcontent.json.JsonXContentParser( + org.opensearch.common.xcontent.json.JsonXContent.jsonXContent.createParser( org.opensearch.core.xcontent.NamedXContentRegistry.EMPTY, org.opensearch.core.xcontent.DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - new com.fasterxml.jackson.core.JsonFactory().createParser(requestContent))); + requestContent)); unit.handleRequest(request, channel, nodeClient); verify(threadPool, Mockito.times(1)) @@ -389,10 +389,10 @@ private ActionListener makeRequest(String requestContent) { Mockito.when(request.param("dataSources")).thenReturn("testDataSource"); Mockito.when(request.contentParser()) .thenReturn( - new org.opensearch.common.xcontent.json.JsonXContentParser( + org.opensearch.common.xcontent.json.JsonXContent.jsonXContent.createParser( org.opensearch.core.xcontent.NamedXContentRegistry.EMPTY, org.opensearch.core.xcontent.DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - new com.fasterxml.jackson.core.JsonFactory().createParser(requestContent))); + requestContent)); Mockito.when(request.consumedParams()).thenReturn(java.util.Collections.emptyList()); Mockito.when(request.params()).thenReturn(java.util.Collections.emptyMap()); diff --git a/docs/category.json b/docs/category.json index 5e9b6f954a5..2342ada464d 100644 --- a/docs/category.json +++ b/docs/category.json @@ -48,6 +48,7 @@ "user/ppl/cmd/top.md", "user/ppl/cmd/trendline.md", "user/ppl/cmd/transpose.md", + "user/ppl/cmd/union.md", "user/ppl/cmd/where.md", "user/ppl/functions/aggregations.md", "user/ppl/functions/collection.md", diff --git a/docs/dev/opensearch-nested-field-subquery.md b/docs/dev/opensearch-nested-field-subquery.md index 58e8c8cd68a..9f86374b37b 100644 --- a/docs/dev/opensearch-nested-field-subquery.md +++ b/docs/dev/opensearch-nested-field-subquery.md @@ -53,8 +53,7 @@ GET /employee_nested/_search "_source": { "includes": [ "name" - ], - "excludes": [] + ] } } @@ -110,8 +109,7 @@ WHERE EXISTS(SELECT * "_source": { "includes": [ "name" - ], - "excludes": [] + ] } } diff --git a/docs/dev/sql-nested-function-select-clause.md b/docs/dev/sql-nested-function-select-clause.md index cbbe82bc9df..a2d109fad2a 100644 --- a/docs/dev/sql-nested-function-select-clause.md +++ b/docs/dev/sql-nested-function-select-clause.md @@ -106,8 +106,7 @@ A basic nested function in the SELECT clause and output DSL pushed to OpenSearch "_source": { "includes": [ "message.info" - ], - "excludes": [] + ] } } } @@ -147,8 +146,7 @@ Example with multiple SELECT clause function calls sharing same path. These two "includes": [ "message.info", "message.author" - ], - "excludes": [] + ] } } } @@ -187,8 +185,7 @@ An example with multiple nested function calls in the SELECT clause having diffe "_source": { "includes": [ "comment.data" - ], - "excludes": [] + ] } } } @@ -207,8 +204,7 @@ An example with multiple nested function calls in the SELECT clause having diffe "_source": { "includes": [ "message.info" - ], - "excludes": [] + ] } } } diff --git a/docs/user/beyond/fulltext.rst b/docs/user/beyond/fulltext.rst index 558ddfadbf4..9e391642f79 100644 --- a/docs/user/beyond/fulltext.rst +++ b/docs/user/beyond/fulltext.rst @@ -80,8 +80,7 @@ Explain:: "includes" : [ "account_number", "address" - ], - "excludes" : [ ] + ] } } @@ -150,8 +149,7 @@ Explain:: "includes" : [ "account_number", "address" - ], - "excludes" : [ ] + ] } } @@ -230,8 +228,7 @@ Explain:: "includes" : [ "firstname", "lastname" - ], - "excludes" : [ ] + ] } } @@ -311,8 +308,7 @@ Explain:: "includes" : [ "account_number", "address" - ], - "excludes" : [ ] + ] } } @@ -386,8 +382,7 @@ Explain:: "includes" : [ "account_number", "address" - ], - "excludes" : [ ] + ] } } @@ -492,8 +487,7 @@ Explain:: "account_number", "address", "_score" - ], - "excludes" : [ ] + ] }, "sort" : [ { diff --git a/docs/user/beyond/partiql.rst b/docs/user/beyond/partiql.rst index d8e4b0722bd..c4a658f25cb 100644 --- a/docs/user/beyond/partiql.rst +++ b/docs/user/beyond/partiql.rst @@ -286,8 +286,7 @@ Explain:: "_source" : { "includes" : [ "projects.name" - ], - "excludes" : [ ] + ] } } } @@ -305,8 +304,7 @@ Explain:: "_source" : { "includes" : [ "name" - ], - "excludes" : [ ] + ] } } @@ -423,8 +421,7 @@ Explain:: "_source" : { "includes" : [ "name" - ], - "excludes" : [ ] + ] } } diff --git a/docs/user/dql/basics.rst b/docs/user/dql/basics.rst index 0ecc45abfda..6ae8557c1da 100644 --- a/docs/user/dql/basics.rst +++ b/docs/user/dql/basics.rst @@ -136,8 +136,7 @@ Explain:: "includes" : [ "firstname", "lastname" - ], - "excludes" : [ ] + ] } } @@ -181,8 +180,7 @@ Explain:: "_routing", "_sort", "lastname" - ], - "excludes" : [ ] + ] } } @@ -220,8 +218,7 @@ Explain:: "_source" : { "includes" : [ "account_number" - ], - "excludes" : [ ] + ] } } @@ -260,8 +257,7 @@ Explain:: "_source" : { "includes" : [ "age" - ], - "excludes" : [ ] + ] }, "stored_fields" : "age", "aggregations" : { @@ -422,8 +418,7 @@ Explain:: "_source" : { "includes" : [ "account_number" - ], - "excludes" : [ ] + ] } } @@ -493,8 +488,7 @@ Explain:: "includes" : [ "account_number", "employer" - ], - "excludes" : [ ] + ] } } @@ -539,8 +533,7 @@ Explain:: "_source" : { "includes" : [ "age" - ], - "excludes" : [ ] + ] }, "stored_fields" : "age", "aggregations" : { @@ -603,8 +596,7 @@ Explain:: "_source" : { "includes" : [ "account_number" - ], - "excludes" : [ ] + ] }, "stored_fields" : "account_number", "aggregations" : { @@ -667,8 +659,7 @@ Explain:: "_source" : { "includes" : [ "age" - ], - "excludes" : [ ] + ] }, "stored_fields" : "age", "aggregations" : { @@ -731,8 +722,7 @@ Explain:: "_source" : { "includes" : [ "script" - ], - "excludes" : [ ] + ] }, "stored_fields" : "abs(age)", "script_fields" : { @@ -815,8 +805,7 @@ Explain:: "includes" : [ "age", "MAX" - ], - "excludes" : [ ] + ] }, "stored_fields" : "age", "aggregations" : { @@ -904,8 +893,7 @@ Explain:: "_source" : { "includes" : [ "account_number" - ], - "excludes" : [ ] + ] }, "sort" : [ { @@ -955,8 +943,7 @@ Explain:: "_source" : { "includes" : [ "employer" - ], - "excludes" : [ ] + ] }, "sort" : [ { @@ -1075,8 +1062,7 @@ Explain:: "_source" : { "includes" : [ "account_number" - ], - "excludes" : [ ] + ] }, "sort" : [ { @@ -1120,8 +1106,7 @@ Explain:: "_source" : { "includes" : [ "account_number" - ], - "excludes" : [ ] + ] }, "sort" : [ { diff --git a/docs/user/dql/complex.rst b/docs/user/dql/complex.rst index 906ea21904b..dd751bcd26e 100644 --- a/docs/user/dql/complex.rst +++ b/docs/user/dql/complex.rst @@ -112,7 +112,6 @@ Explain:: "size" : 200, "from" : 0, "_source" : { - "excludes" : [ ], "includes" : [ "firstname", "lastname", @@ -223,8 +222,7 @@ Explain:: "firstname", "lastname", "age" - ], - "excludes" : [ ] + ] } } @@ -325,7 +323,6 @@ Explain:: "size" : 200, "from" : 0, "_source" : { - "excludes" : [ ], "includes" : [ "id", "name" @@ -338,7 +335,6 @@ Explain:: "size" : 200, "from" : 0, "_source" : { - "excludes" : [ ], "includes" : [ "account_number", "firstname", diff --git a/docs/user/dql/vector-search.rst b/docs/user/dql/vector-search.rst new file mode 100644 index 00000000000..8b0237a6ef0 --- /dev/null +++ b/docs/user/dql/vector-search.rst @@ -0,0 +1,331 @@ + +============================== +Vector Search [Experimental] +============================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + +Introduction +============ + +``vectorSearch()`` is an experimental feature. Syntax, options, and +pushdown behavior may change in future releases based on feedback. + +The ``vectorSearch()`` table function runs a k-NN query against a ``knn_vector`` +field and exposes the matching documents as a relation in the ``FROM`` clause. +It relies on the OpenSearch `k-NN plugin +`_. The target index must +map the vector field as ``knn_vector`` and the index must be created with +``index.knn: true``. + +The SQL layer translates ``vectorSearch()`` into an OpenSearch search +request whose body is native k-NN query DSL; the query vector is parsed +into a numeric array before that DSL is emitted. + +Relevance is expressed through the OpenSearch ``_score`` metadata field, and +results are returned ordered by ``_score DESC`` by default. + +vectorSearch +============ + +Description +----------- + +``vectorSearch(table='', field='', vector='', option='')`` + +All four arguments are required and must be passed by name as string +literals. Positional arguments, or a mix of positional and named +arguments, are not supported. For example, the following is invalid:: + + FROM vectorSearch('my_vectors', field='embedding', + vector='[0.1,0.2]', option='k=5') AS v + +A table alias is required. Projected fields are referenced through the +alias (``v._id``, ``v._score``, ``v.category``). + +If the ``opensearch-knn`` plugin is not installed on the target cluster, +query execution fails with a ``vectorSearch() requires the k-NN plugin`` +error. ``_explain`` continues to work without the plugin. + +Arguments +--------- + +- ``table``: single concrete index or alias to search. Wildcards + (``*``), comma-separated multi-index targets, ``_all``, ``.``, and + ``..`` are not supported. The target index must have + ``index.knn: true`` and map the target field as ``knn_vector``. A + normal alias name is accepted. If the alias resolves to multiple + backing indices, the SQL layer does not prevalidate that every + backing index has a compatible ``knn_vector`` mapping, dimension, or + engine; OpenSearch execution remains the source of truth for those + checks. +- ``field``: name of the ``knn_vector`` field. +- ``vector``: query vector as a JSON-style array of numbers, passed as a + string (for example, ``'[0.1, 0.2, 0.3]'``). Components must be + comma-separated finite numbers. Semicolon, colon, and pipe separators + are not supported, and empty components (for example, ``'[1.0,,2.0]'`` + or ``'[1.0,]'``) return an error. The vector dimension must match the + ``knn_vector`` mapping on the target index. +- ``option``: comma-separated ``key=value`` pairs. Exactly one of ``k``, + ``max_distance``, or ``min_score`` is required. ``filter_type`` is + optional. + +Supported option keys +--------------------- + +Option keys are lower-case and case-sensitive. ``K=5`` or +``Filter_Type=post`` returns an "Unknown option key" error. + +- ``k``: top-k mode. Integer between 1 and 10000. The query returns up to + ``k`` nearest neighbors. +- ``max_distance``: radial mode. Non-negative number. Matches documents + within the given distance of the query vector. ``LIMIT`` is required and + caps the returned rows. +- ``min_score``: radial mode. Non-negative number. Matches documents with + score at or above the given threshold. ``LIMIT`` is required and caps + the returned rows. +- ``filter_type``: ``post`` or ``efficient``. Controls how a ``WHERE`` + clause is applied. See `Filtering`_. + +``k``, ``max_distance``, and ``min_score`` are mutually exclusive; specify +exactly one. + +Native k-NN tuning options (for example, ``method_parameters.ef_search``, +``method_parameters.nprobes``, ``rescore.oversample_factor``) are not +supported through ``vectorSearch()`` and return an "Unknown option +key" error. + +Syntax +------ + +:: + + SELECT + FROM vectorSearch( + table='', + field='', + vector='', + option='' + ) AS + [WHERE ] + [ORDER BY ._score DESC] + [LIMIT ] + +Example 1: Top-k +---------------- + +Return the five nearest neighbors of a query vector:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='k=5' + ) AS v + """ + } + +In top-k mode, the request size defaults to ``k``; adding ``LIMIT n`` further +reduces the row count, but ``n`` must not exceed ``k``. + +Example 2: Radial search (``max_distance``) +------------------------------------------- + +Return up to the specified ``LIMIT`` documents within a maximum distance +of the query vector. ``LIMIT`` is required for radial searches; without +it the result set would be unbounded:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='max_distance=0.5' + ) AS v + LIMIT 100 + """ + } + +Example 3: Radial search (``min_score``) +---------------------------------------- + +Return up to the specified ``LIMIT`` documents whose score is at or +above the given threshold. ``LIMIT`` is required for radial searches; +without it the result set would be unbounded:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='min_score=0.8' + ) AS v + LIMIT 100 + """ + } + +Filtering +========= + +A ``WHERE`` clause on non-vector fields of the ``vectorSearch()`` alias is +pushed down to OpenSearch when it can be translated to an OpenSearch filter. +Two placement strategies are available via the ``filter_type`` option: + +- ``efficient`` (default): the ``WHERE`` predicate is embedded directly + inside the k-NN query (``knn.filter``), enabling native efficient + k-NN filtering during vector search. Efficient filtering depends on + native k-NN engine and method support; if the target index does not + support ``knn.filter`` for the configured engine and method, set + ``filter_type=post``. See the `k-NN filtering guide + `_ + for engine and method requirements. +- ``post``: the k-NN query is placed in a scoring (``bool.must``) + context and the ``WHERE`` predicate is placed as a non-scoring + ``bool.filter`` outside the k-NN clause. This is Boolean filter + placement, not the REST ``post_filter`` parameter, and may return + fewer than ``k`` rows when the filter is selective. + +Full-text predicates (``match``, ``match_phrase``, ``multi_match``, and +the rest of the full-text family) under a ``WHERE`` clause are used as +filters, not as hybrid keyword-vector score fusion. Their placement +follows ``filter_type``: the default (``efficient``) embeds supported +full-text predicates under ``knn.filter``, while ``post`` places them +in ``bool.filter`` outside the k-NN clause. In both cases they restrict +which candidates are retained but their text relevance score does not +combine with the vector ``_score``. ``vectorSearch()`` is not a hybrid +vector + text relevance scorer. + +Behavior depends on whether ``filter_type`` is specified: + +- **Omitted (default, ``efficient``)**: the ``WHERE`` predicate is + embedded under ``knn.filter`` so the k-NN engine applies native + efficient filtering during vector search. A query with no ``WHERE`` + clause is valid. ``efficient`` supports simple native filters: + ``term``, ``range``, ``wildcard``, ``exists``, full-text family + (``match``, ``match_phrase``, ``match_phrase_prefix``, + ``match_bool_prefix``, ``multi_match``, ``query_string``, + ``simple_query_string``), and boolean combinations of those filters. + Predicates that compile to script queries (arithmetic, function calls + on indexed fields, ``CASE``, date math), nested predicates, and other + query shapes are not supported under ``knn.filter`` and return an + error. Set ``filter_type=post`` to apply such predicates after the + k-NN search. If the predicate cannot be translated to an OpenSearch + filter query at all (a distinct translation failure from the + unsupported-shape cases above), the default path falls back to + evaluating the ``WHERE`` clause in memory after the k-NN results are + returned. +- **Explicit ``efficient``**: same contract as the default. Specifying + it is useful when a query should be explicit about the placement + strategy and should fail if the predicate cannot be safely embedded + under ``knn.filter``. +- **Explicit ``post``**: a ``WHERE`` clause is required and must be + translatable to an OpenSearch filter query. Predicates that translate + to native OpenSearch queries are pushed down as a ``bool.filter`` + alongside the k-NN query. Predicates that do not have a native + equivalent (for example, arithmetic or function calls on indexed + fields) are pushed down as an OpenSearch script query and evaluated + server-side. If predicate translation itself fails, the query returns + an error; there is no silent in-memory fallback under explicit + ``post``. Use ``filter_type=post`` when the predicate shape is not + supported by efficient filtering. + +Example 4: Default efficient filtering (no ``filter_type``) +----------------------------------------------------------- + +:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score, v.category + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='k=10' + ) AS v + WHERE v.category = 'books' + """ + } + +The predicate is embedded under ``knn.filter`` so the k-NN engine +applies native efficient filtering during vector search. + +Example 5: Post-filtering for predicates not supported by efficient mode +------------------------------------------------------------------------ + +Use ``filter_type=post`` for predicates that do not fit the ``efficient`` +allow-list, such as arithmetic or function calls on indexed fields:: + + POST /_plugins/_sql + { + "query" : """ + SELECT v._id, v._score, v.category + FROM vectorSearch( + table='my_vectors', + field='embedding', + vector='[0.1, 0.2, 0.3]', + option='k=10,filter_type=post' + ) AS v + WHERE v.price * 1.1 < 100 + """ + } + +Scoring, sorting, and limits +============================ + +- ``vectorSearch()`` exposes the OpenSearch ``_score`` metadata field on the + alias. For an alias ``v``, select it as ``v._score``. +- ``_score`` can be selected and referenced in ``ORDER BY``, but it cannot + appear in ``WHERE``. Use ``option='min_score=...'`` for score-threshold + vector search. +- Results are returned in ``_score DESC`` order by default. The only + supported ``ORDER BY`` expression is ``._score DESC`` (for + example, ``v._score DESC``). +- In top-k mode (``k=N``), ``LIMIT n`` is optional; when present, ``n`` must + be ``≤ k``. +- In radial mode (``max_distance`` or ``min_score``), ``LIMIT`` is required. +- ``OFFSET`` is not supported on ``vectorSearch()``. Use ``LIMIT`` only. + +Limitations +=========== + +The following are not supported on ``vectorSearch()``: + +- ``GROUP BY`` and aggregations directly over a ``vectorSearch()`` + relation are not supported and return an error. +- Operators wrapped around a ``vectorSearch()`` subquery are rejected + when they would run after ``vectorSearch()`` has already produced a + finite result set, because they can silently yield zero, skipped, or + incorrectly ordered rows. Specifically, an outer ``WHERE``, + ``ORDER BY``, ``OFFSET`` (non-zero), ``GROUP BY``, aggregation, or + ``DISTINCT`` applied to a ``vectorSearch()`` subquery returns an + error. Place ``WHERE`` predicates inside the subquery, directly on + the ``vectorSearch()`` alias, so that they participate in ``WHERE`` + pushdown. A plain outer ``LIMIT`` (without ``OFFSET``) wrapping a + ``vectorSearch()`` subquery is allowed and caps the returned rows. +- ``JOIN`` between a ``vectorSearch()`` relation and another relation is + not supported. +- ``UNION`` / ``INTERSECT`` / ``EXCEPT`` combining a ``vectorSearch()`` + relation with another relation is not supported. +- Multiple ``vectorSearch()`` calls in the same query are not supported. +- The query vector must be supplied as a literal. Parameterized vectors + (for example, values bound from another column) are not supported. +- Indexes that define a user field named ``_score`` cannot be queried + with ``vectorSearch()`` because ``_score`` is reserved for the + synthetic vector score exposed on the alias. Rename the field or query + the index with a plain ``SELECT``. diff --git a/docs/user/general/identifiers.rst b/docs/user/general/identifiers.rst index f4d455deb5c..49921d1fd79 100644 --- a/docs/user/general/identifiers.rst +++ b/docs/user/general/identifiers.rst @@ -150,7 +150,7 @@ Description To query multiple indices, you could 1. Include ``*`` in index name, this is an index pattern for wildcard match. -2. Delimited multiple indices and seperated them by ``,``. Note: no space allowed between each index. +2. Delimit multiple indices with ``,`` and enclose the entire comma-separated list in backticks. Note: no space allowed between each index. Examples diff --git a/docs/user/index.rst b/docs/user/index.rst index bb4b6399198..32ce39ed93d 100644 --- a/docs/user/index.rst +++ b/docs/user/index.rst @@ -43,6 +43,8 @@ OpenSearch SQL enables you to extract insights out of OpenSearch using the famil - `Window Functions `_ + - `Vector Search `_ + * **Beyond SQL** - `PartiQL (JSON) Support `_ diff --git a/docs/user/interfaces/endpoint.rst b/docs/user/interfaces/endpoint.rst index 26dca94d228..1deab285103 100644 --- a/docs/user/interfaces/endpoint.rst +++ b/docs/user/interfaces/endpoint.rst @@ -88,8 +88,7 @@ Explain:: "includes" : [ "firstname", "lastname" - ], - "excludes" : [ ] + ] } } diff --git a/docs/user/interfaces/protocol.rst b/docs/user/interfaces/protocol.rst index 625eed0f544..29a5b185c46 100644 --- a/docs/user/interfaces/protocol.rst +++ b/docs/user/interfaces/protocol.rst @@ -80,8 +80,7 @@ Explain:: "firstname", "lastname", "balance" - ], - "excludes" : [ ] + ] } } diff --git a/docs/user/optimization/optimization.rst b/docs/user/optimization/optimization.rst index 0af19e9f9ed..d0313d04bd3 100644 --- a/docs/user/optimization/optimization.rst +++ b/docs/user/optimization/optimization.rst @@ -44,7 +44,7 @@ The consecutive Filter operator will be merged as one Filter operator:: { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}})" + "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"]}})" }, "children": [] } @@ -71,7 +71,7 @@ The Filter operator should be push down under Sort operator:: { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]})" + "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]})" }, "children": [] } @@ -102,7 +102,7 @@ The Project list will push down to Query DSL to `filter the source () [AS ] [, () [AS ]]... +convert [timeformat=] () [AS ] [, () [AS ]]... ``` ## Parameters The `convert` command supports the following parameters. -| Parameter | Required/Optional | Description | -| --- | --- | --- | -| `` | Required | One of the conversion functions: `auto()`, `num()`, `rmcomma()`, `rmunit()`, `memk()`, or `none()`. | -| `` | Required | Single field name to convert. | -| `AS ` | Optional | Create new field with converted value, preserving original field. | +| Parameter | Required/Optional | Description | Default | +| --- | --- | --- | --- | +| `` | Required | One of the conversion functions: `auto()`, `ctime()`, `dur2sec()`, `memk()`, `mktime()`, `mstime()`, `none()`, `num()`, `rmcomma()`, or `rmunit()`. | N/A | +| `` | Required | Single field name to convert. | N/A | +| `AS ` | Optional | Create new field with converted value, preserving original field. | N/A | +| `timeformat=` | Optional | A strftime format string used by `ctime()` and `mktime()`. | `%m/%d/%Y %H:%M:%S`. | ## Conversion Functions | Function | Description | | --- | --- | | `auto(field)` | Automatically converts fields to numbers using intelligent conversion. Handles memory sizes (k/m/g), commas, units, and scientific notation. Returns `null` for non-convertible values. | +| `ctime(field)` | Converts a UNIX epoch timestamp to a human-readable time string. Uses the `timeformat` parameter if specified, otherwise defaults to `%m/%d/%Y %H:%M:%S`. All timestamps are interpreted in UTC timezone. | +| `dur2sec(field)` | Converts a duration string in `HH:MM:SS` format to total seconds. Hours must be less than 24. Returns `null` for invalid formats. | +| `memk(field)` | Converts memory size strings to kilobytes. Accepts numbers with optional k/m/g suffix (case-insensitive). Default unit is kilobytes. Returns `null` for invalid formats. | +| `mktime(field)` | Converts a human-readable time string to a UNIX epoch timestamp. Uses the `timeformat` parameter if specified, otherwise defaults to `%m/%d/%Y %H:%M:%S`. Input strings are interpreted as UTC timezone. | +| `mstime(field)` | Converts a time string in `[MM:]SS.SSS` format to total seconds. The minutes portion is optional. Returns `null` for invalid formats. | +| `none(field)` | No-op function that preserves the original field value. | | `num(field)` | Extracts leading numbers from strings. For strings without letters: removes commas as thousands separators. For strings with letters: extracts leading number, stops at letters or commas. Returns `null` for non-convertible values. | | `rmcomma(field)` | Removes commas from field values and converts to a number. Returns `null` if the value contains letters. | | `rmunit(field)` | Extracts leading numeric values from strings. Stops at the first non-numeric character (including commas). Returns `null` for non-convertible values. | -| `memk(field)` | Converts memory size strings to kilobytes. Accepts numbers with optional k/m/g suffix (case-insensitive). Default unit is kilobytes. Returns `null` for invalid formats. | -| `none(field)` | No-op function that preserves the original field value. Used for excluding specific fields from wildcard conversions. | ## Example 1: Basic auto() conversion @@ -241,6 +246,128 @@ fetched rows / total rows = 3/3 **Note:** The `none()` function is particularly useful when wildcard support is implemented, allowing you to exclude specific fields from bulk conversions. +## Example 9: Convert epoch timestamp to time string with ctime() + +```ppl +source=accounts +| eval timestamp = 1066507633 +| convert ctime(timestamp) +| fields timestamp +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++---------------------+ +| timestamp | +|---------------------| +| 10/18/2003 20:07:13 | ++---------------------+ +``` + +## Example 10: Convert time string to epoch with mktime() + +```ppl +source=accounts +| eval date_str = '10/18/2003 20:07:13' +| convert mktime(date_str) +| fields date_str +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| date_str | +|--------------| +| 1.066507633E9| ++--------------+ +``` + +## Example 11: Using timeformat with ctime() and mktime() + +The `timeformat` parameter specifies a strftime format string for `ctime()` and `mktime()`: + +```ppl +source=accounts +| eval timestamp = 1066507633 +| convert timeformat="%Y-%m-%d %H:%M:%S" ctime(timestamp) +| fields timestamp +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++---------------------+ +| timestamp | +|---------------------| +| 2003-10-18 20:07:13 | ++---------------------+ +``` + +Similarly, you can use `timeformat` with `mktime()` to parse dates in custom formats: + +```ppl +source=accounts +| eval date_str = '2000-01-01 00:00:00' +| convert timeformat="%Y-%m-%d %H:%M:%S" mktime(date_str) +| fields date_str +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++------------+ +| date_str | +|------------| +| 9.466848E8 | ++------------+ +``` + +## Example 12: Convert duration to seconds with dur2sec() + +```ppl +source=accounts +| eval duration = '01:23:45' +| convert dur2sec(duration) +| fields duration +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++----------+ +| duration | +|----------| +| 5025.0 | ++----------+ +``` + +## Example 13: Convert minutes and seconds with mstime() + +```ppl +source=accounts +| eval time_str = '03:45.5' +| convert mstime(time_str) +| fields time_str +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++----------+ +| time_str | +|----------| +| 225.5 | ++----------+ +``` + ## Notes - All conversion functions return `null` for values that cannot be converted to a number diff --git a/docs/user/ppl/cmd/graphlookup.md b/docs/user/ppl/cmd/graphlookup.md index 00754263c8c..94e0cf3968d 100644 --- a/docs/user/ppl/cmd/graphlookup.md +++ b/docs/user/ppl/cmd/graphlookup.md @@ -5,10 +5,14 @@ The `graphLookup` command performs recursive graph traversal on a collection usi ## Syntax -The `graphLookup` command has the following syntax: +```syntax +source = | graphLookup start= edge= [maxDepth=] [depthField=] [supportArray=(true | false)] [batchMode=(true | false)] [usePIT=(true | false)] [filter=()] as +``` + +`graphLookup` can be used as the first command (without `source`): ```syntax -graphLookup start= edge= [maxDepth=] [depthField=] [supportArray=(true | false)] [batchMode=(true | false)] [usePIT=(true | false)] [filter=()] as +graphLookup start= edge= [maxDepth=] [depthField=] [usePIT=(true | false)] [filter=()] as ``` The following are examples of the `graphLookup` command syntax: @@ -21,24 +25,27 @@ source = employees | graphLookup employees start=reportsTo edge=reportsTo<->name source = travelers | graphLookup airports start=nearestAirport edge=connects-->airport supportArray=true as reachableAirports source = airports | graphLookup airports start=airport edge=connects-->airport supportArray=true as reachableAirports source = employees | graphLookup employees start=reportsTo edge=reportsTo-->name filter=(status = 'active' AND age > 18) as reportingHierarchy +graphLookup employees start='Eliot' edge=reportsTo-->name as reportingHierarchy +graphLookup employees start='Eliot', 'Andrew' edge=reportsTo-->name as reportingHierarchy +graphLookup employees start='Eliot' edge=reportsTo-->name maxDepth=1 depthField=level as reportingHierarchy ``` ## Parameters The `graphLookup` command supports the following parameters. -| Parameter | Required/Optional | Description | -|---|---|---| -| `` | Required | The name of the index to perform the graph traversal on. Can be the same as the source index for self-referential graphs. | -| `start=` | Required | The field in the source documents whose value is used to initiate the recursive search. The value of this field is matched against `toField` in the lookup index. Supports both single values and array values as starting points. | -| `edge=` | Required | Defines the traversal path between nodes, specifying the connection fields and the direction of traversal. See [Edge Sub-parameters](#edge-sub-parameters) below. | -| `maxDepth=` | Optional | The maximum recursion depth (number of hops). Default is `0`. A value of `0` returns only direct connections to the start values. A value of `1` returns the initial matches plus one additional recursive step, and so on. | -| `depthField=` | Optional | The name of the field added to each traversed document to indicate its recursion depth. If not specified, no depth field is added. Depth starts at `0` for the first level of matches. | -| `supportArray=(true \| false)` | Optional | When `true`, disables early visited-node filter pushdown to OpenSearch. Default is `false`. Set to `true` when `fromField` or `toField` contains array values to ensure correct traversal behavior. See [Array Field Handling](#array-field-handling) for details. | -| `batchMode=(true \| false)` | Optional | When `true`, collects all start values from all source rows and performs a single unified BFS traversal. Default is `false`. The output changes to two arrays: `[Array, Array]`. See [Batch Mode](#batch-mode) for details. | -| `usePIT=(true \| false)` | Optional | When `true`, enables Point In Time (PIT) search for the lookup index, allowing paginated retrieval of complete results without the `max_result_window` size limit. Default is `false`. See [PIT Search](#pit-search) for details. | -| `filter=()` | Optional | A filter condition that restricts which lookup index documents participate in the graph traversal. Only documents matching the condition are considered as candidates during BFS. Parentheses around the condition are required. Example: `filter=(status = 'active' AND age > 18)`. | -| `as ` | Required | The name of the output array field that will contain all documents discovered during the graph traversal. | +| Parameter | Required/Optional | Description | +|---|---|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `` | Required | The name of the index to perform the graph traversal on. Can be the same as the source index for self-referential graphs. | +| `start=` | Required | The starting point for the BFS traversal. The `startExpression` can be a **field reference** (e.g., `start=reportsTo`) from the previous pipe, a **literal value** (e.g., `start='Eliot'`), or a **literal list** (e.g., `start='Eliot', 'Andrew'`). When a field reference is used, the value of that field in each source row initiates the traversal. When literal values are used, they seed the BFS directly. The start value is matched against `toField` in the lookup index. | +| `edge=` | Required | Defines the traversal path between nodes, specifying the connection fields and the direction of traversal. See [Edge Sub-parameters](#edge-sub-parameters) below. | +| `maxDepth=` | Optional | The maximum recursion depth (number of hops). Default is `0`. A value of `0` returns only direct connections to the start values. A value of `1` returns the initial matches plus one additional recursive step, and so on. | +| `depthField=` | Optional | The name of the field added to each traversed document to indicate its recursion depth. If not specified, no depth field is added. Depth starts at `0` for the first level of matches. | +| `supportArray=(true \| false)` | Optional | When `true`, disables early visited-node filter pushdown to OpenSearch. Default is `false`. Set to `true` when `fromField` or `toField` contains array values to ensure correct traversal behavior. See [Array Field Handling](#array-field-handling) for details. | +| `batchMode=(true \| false)` | Optional | When `true`, collects all start values from all source rows and performs a single unified BFS traversal. Default is `false`. The output changes to two arrays: `[Array, Array]`. See [Batch Mode](#batch-mode) for details. | +| `usePIT=(true \| false)` | Optional | When `true`, enables Point In Time (PIT) search for the lookup index, allowing paginated retrieval of complete results without the `max_result_window` size limit. Default is `false`. See [PIT Search](#pit-search) for details. | +| `filter=()` | Optional | A filter condition that restricts which lookup index documents participate in the graph traversal. Only documents matching the condition are considered as candidates during BFS. Parentheses around the condition are required. Example: `filter=(status = 'active' AND age > 18)`. | +| `as ` | Required | The name of the output array field that will contain all documents discovered during the graph traversal. | ### Edge Sub-parameters @@ -354,6 +361,55 @@ source = employees The filter is applied at the OpenSearch query level, so it combines efficiently with the BFS traversal queries. At each BFS level, the query sent to OpenSearch is effectively: `bool { filter: [user_filter, bfs_terms_query] }`. +### When to Use as First Command + +When the starting points for graph traversal are known in advance, `graphLookup` can be used as the first command in a pipeline without `source`. In this case, `start` accepts literal values instead of a field reference. + +This is useful when: +- You want to explore the graph from specific known nodes +- You don't need source document fields in the output +- You want a quick lookup without creating a source query first + +**Single start value:** + +```ppl ignore +graphLookup employees + start='Eliot' + edge=reportsTo-->name + as reportingHierarchy +``` + +The query returns a single row containing the BFS results: + +```text ++---------------------------------------------------------------+ +| reportingHierarchy | ++---------------------------------------------------------------+ +| [{name:Eliot, reportsTo:Ron, id:2}, {name:Ron, ...}, ...] | ++---------------------------------------------------------------+ +``` + +**Multiple start values:** + +```ppl ignore +graphLookup employees + start='Eliot', 'Andrew' + edge=reportsTo-->name + as reportingHierarchy +``` + +All literal start values are combined into a single BFS traversal. The output is a single row with all discovered nodes. + +**With depth tracking:** + +```ppl ignore +graphLookup employees + start='Eliot' + edge=reportsTo-->name + depthField=level + as reportingHierarchy +``` + ## Limitations - The source input, which provides the starting point for the traversal, has a limitation of 100 documents to avoid performance issues. diff --git a/docs/user/ppl/cmd/mvcombine.md b/docs/user/ppl/cmd/mvcombine.md index 4ccad724ca7..8951b0d7fed 100644 --- a/docs/user/ppl/cmd/mvcombine.md +++ b/docs/user/ppl/cmd/mvcombine.md @@ -124,6 +124,6 @@ source=mvcombine_data Expected output: ```text -{'reason': 'Invalid Query', 'details': 'Field [does_not_exist] not found.', 'type': 'IllegalArgumentException'} +{'context': {'stage': 'analyzing', 'stage_description': 'Parsing and validating the query'}, 'reason': 'Field [does_not_exist] not found.', 'details': 'Field [does_not_exist] not found.', 'location': ['while preparing and validating the query plan'], 'code': 'FIELD_NOT_FOUND', 'type': 'IllegalArgumentException'} Error: Query returned no data ``` \ No newline at end of file diff --git a/docs/user/ppl/cmd/mvexpand.md b/docs/user/ppl/cmd/mvexpand.md index 6fdd9bca365..2bb01f2d0e0 100644 --- a/docs/user/ppl/cmd/mvexpand.md +++ b/docs/user/ppl/cmd/mvexpand.md @@ -132,6 +132,6 @@ source=people Expected output: ```text -{'reason': 'Invalid Query', 'details': "Field 'tags' not found in the schema", 'type': 'SemanticCheckException'} +{'context': {'stage': 'analyzing', 'stage_description': 'Parsing and validating the query', 'command': 'mvexpand'}, 'reason': "Field 'tags' not found in the schema", 'details': "Field 'tags' not found in the schema", 'location': ['while preparing and validating the query plan', 'while evaluating the input field for mvexpand'], 'code': 'FIELD_NOT_FOUND', 'type': 'SemanticCheckException'} Error: Query returned no data ``` \ No newline at end of file diff --git a/docs/user/ppl/cmd/reverse.md b/docs/user/ppl/cmd/reverse.md index 9505abad93b..dcd4b3dc559 100644 --- a/docs/user/ppl/cmd/reverse.md +++ b/docs/user/ppl/cmd/reverse.md @@ -5,6 +5,16 @@ The `reverse` command reverses the display order of the search results. It retur > **Note**: The `reverse` command processes the entire dataset. If applied directly to millions of records, it consumes significant coordinating node memory resources. Only apply the `reverse` command to smaller datasets, typically after aggregation operations. +## Performance optimization + +The `reverse` command uses an optimized implementation that intelligently reverses existing sort collations instead of using a `ROW_NUMBER()` approach. The behavior depends on the context: + +1. **Existing sort collation**: If a preceding `sort` command is detected, `reverse` flips the sort direction of each field (e.g., ASC becomes DESC and vice versa). This leverages database-native sort reversal for significantly better performance. +2. **`@timestamp` field**: If no explicit sort exists but the data source has an `@timestamp` field, `reverse` sorts by `@timestamp` in descending order. +3. **No sort or `@timestamp`**: If neither an explicit sort nor an `@timestamp` field is found, `reverse` is a no-op (ignored). + +The optimization also supports **backtracking** through non-blocking operators like `where`, `eval`, and `fields` to find an upstream sort. However, blocking operators such as `stats` (aggregation), `join`, and set operations destroy the collation, so `reverse` after these operators is a no-op unless a new `sort` is added after them. + ## Syntax The `reverse` command has the following syntax: @@ -116,19 +126,19 @@ fetched rows / total rows = 4/4 ``` -## Example 5: Use the reverse command with a complex pipeline +## Example 5: Use the reverse command with a complex pipeline The following query uses the `reverse` command with filtering and field selection: - + ```ppl source=accounts | where age > 30 | fields account_number, age | reverse ``` - + The query returns the following results: - + ```text fetched rows / total rows = 3/3 +----------------+-----+ @@ -140,3 +150,205 @@ fetched rows / total rows = 3/3 +----------------+-----+ ``` +## Example 6: Reverse with descending sort + +The following query reverses a descending sort, effectively producing ascending order: + +```ppl +source=accounts +| sort - account_number +| fields account_number +| reverse +``` + +The query returns the following results: + +```text +fetched rows / total rows = 7/7 ++----------------+ +| account_number | +|----------------| +| 1 | +| 6 | +| 13 | +| 18 | +| 20 | +| 25 | +| 32 | ++----------------+ +``` + +## Example 7: Reverse with mixed sort directions + +The following query reverses a multi-field sort with mixed directions. Each field's sort direction is individually flipped: + +```ppl +source=accounts +| sort - account_number, + firstname +| fields account_number, firstname +| reverse +``` + +The query returns the following results: + +```text +fetched rows / total rows = 7/7 ++----------------+--------------+ +| account_number | firstname | +|----------------+--------------| +| 1 | Amber JOHnny | +| 6 | Hattie | +| 13 | Nanette | +| 18 | Dale | +| 20 | Elinor | +| 25 | Virginia | +| 32 | Dillard | ++----------------+--------------+ +``` + +## Example 8: Reverse with @timestamp field + +When no explicit sort exists but the data source has an `@timestamp` field, `reverse` sorts by `@timestamp` in descending order: + +```ppl +source=time_test_data +| fields value, category, `@timestamp` +| reverse +| head 5 +``` + +The query returns the following results: + +```text +fetched rows / total rows = 5/5 ++-------+----------+---------------------+ +| value | category | @timestamp | +|-------+----------+---------------------| +| 8762 | A | 2025-08-01 03:47:41 | +| 7348 | C | 2025-08-01 02:00:56 | +| 9015 | B | 2025-08-01 01:14:11 | +| 6489 | D | 2025-08-01 00:27:26 | +| 8676 | A | 2025-07-31 23:40:33 | ++-------+----------+---------------------+ +``` + +## Example 9: Reverse is ignored without sort or @timestamp + +When there is no explicit sort and the data source has no `@timestamp` field, `reverse` is a no-op and data remains in its natural order: + +```ppl +source=accounts +| fields account_number +| reverse +| head 3 +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++----------------+ +| account_number | +|----------------| +| 1 | +| 6 | +| 13 | ++----------------+ +``` + +## Example 10: Reverse backtracks through filter and eval + +The `reverse` command can detect sort collations through non-blocking operators like `where` and `eval`: + +```ppl +source=accounts +| sort account_number +| where balance > 30000 +| fields account_number, balance +| reverse +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++----------------+---------+ +| account_number | balance | +|----------------+---------| +| 32 | 48086 | +| 25 | 40540 | +| 13 | 32838 | +| 1 | 39225 | ++----------------+---------+ +``` + +## Example 11: Reverse is a no-op after aggregation + +Aggregation (`stats`) destroys input ordering, so `reverse` after aggregation without a subsequent `sort` is a no-op: + +```ppl +source=accounts +| stats count() as c by gender +| reverse +``` + +The query returns the following results (order not guaranteed): + +```text +fetched rows / total rows = 2/2 ++---+--------+ +| c | gender | +|---+--------| +| 4 | M | +| 3 | F | ++---+--------+ +``` + +## Example 12: Reverse works with sort after aggregation + +Adding a `sort` after aggregation restores collation, allowing `reverse` to work: + +```ppl +source=accounts +| stats count() as c by gender +| sort gender +| reverse +``` + +The query returns the following results: + +```text +fetched rows / total rows = 2/2 ++---+--------+ +| c | gender | +|---+--------| +| 4 | M | +| 3 | F | ++---+--------+ +``` + +## Example 13: Reverse with timechart + +The `timechart` command adds a sort on the time field, so `reverse` flips it to return results in reverse chronological order: + +```ppl +source=events +| timechart span=1m count() +| reverse +``` + +The query returns the following results: + +```text +fetched rows / total rows = 5/5 ++---------------------+----------+ +| @timestamp | count() | +|---------------------+----------| +| 2024-07-01 00:04:00 | 1 | +| 2024-07-01 00:03:00 | 1 | +| 2024-07-01 00:02:00 | 1 | +| 2024-07-01 00:01:00 | 1 | +| 2024-07-01 00:00:00 | 1 | ++---------------------+----------+ +``` + diff --git a/docs/user/ppl/cmd/rex.md b/docs/user/ppl/cmd/rex.md index b4fe706f489..6d50a875dd6 100644 --- a/docs/user/ppl/cmd/rex.md +++ b/docs/user/ppl/cmd/rex.md @@ -228,7 +228,7 @@ source=accounts The query returns the following results: ```text -{'reason': 'Invalid Query', 'details': "Invalid capture group name 'user_name'. Java regex group names must start with a letter and contain only letters and digits.", 'type': 'IllegalArgumentException'} +{'reason': "Invalid capture group name 'user_name'.", 'code': 'SYNTAX_ERROR', 'suggestion': 'Java Regex capture groups must be alphanumeric and start with a letter. Update the capture group to be alphanumeric.', 'context': {'stage': 'analyzing', 'stage_description': 'Parsing and validating the query', 'command': 'rex'}, 'details': "Invalid capture group name 'user_name'.", 'location': ['while preparing and validating the query plan', 'while processing the rex command', 'while validating the capture groups for the pattern'], 'type': 'IllegalArgumentException'} Error: Query returned no data ``` diff --git a/docs/user/ppl/cmd/union.md b/docs/user/ppl/cmd/union.md new file mode 100644 index 00000000000..8c148b998c2 --- /dev/null +++ b/docs/user/ppl/cmd/union.md @@ -0,0 +1,197 @@ + +# union + +The `union` command combines results from multiple datasets using UNION ALL semantics. It merges rows from two or more sources into a single result set, preserving all rows including duplicates. You can optionally apply subsequent processing, such as aggregation or sorting, to the combined results. Each dataset can be a subsearch with different filtering criteria, data transformations, and field selections, or a direct index reference. + +Union is particularly useful for combining data from multiple sources, creating comprehensive datasets from different criteria, and consolidating results while handling schema differences through automatic type coercion. + +Use union for: + +* **Multi-source data combination**: Merge data from different indexes or apply different filters to the same source. +* **Dataset consolidation**: Combine results from different queries while preserving all rows including duplicates. +* **Flexible dataset patterns**: Use subsearches or direct index references with optional maxout control. +* **Schema unification**: Automatically handle different schemas with type coercion for conflicting field types and NULL-fill for missing fields. + +## Syntax + +The `union` command has the following syntax: + +```syntax +union [maxout=] [ ...] +``` + +Each dataset can be: +- **Direct index reference**: `index_name`, `index_pattern*`, `index_alias` +- **Subsearch**: `[search source=index | ]` + +The following are examples of the `union` command syntax: + +```syntax +| union logs-*, security-logs +| union [search source=accounts | where age > 30], [search source=accounts | where age < 30] +| union maxout=100 [search source=logs | fields user, action], [search source=events | fields user, action] +| union [search source=accounts | where status="active"], [search source=accounts | where status="pending"] +``` + +## Parameters + +The `union` command supports the following parameters. + +| Parameter | Required/Optional | Description | +| --- | --- | --- | +| `maxout` | Optional | Maximum number of results to return from the union operation. Default: unlimited (0). | +| `` | Required | At least two datasets are required. Each dataset can be either a subsearch enclosed in square brackets (`[search source=index | ]`) or a direct index reference (for example, `accounts`, `logs-*`). All PPL commands are supported within subsearches. | +| `` | Optional | Commands applied to the merged results after the union operation (for example, `stats`, `sort`, or `head`). | + +## Example 1: Combining age groups for demographic analysis + +This example demonstrates how to merge customers from different age segments into a unified dataset. It combines `young` and `adult` customers into a single result set and adds categorization labels for further analysis: + +```ppl +| union [search source=accounts +| where age < 30 +| eval age_group = "young" +| fields firstname, age, age_group] [search source=accounts +| where age >= 30 +| eval age_group = "adult" +| fields firstname, age, age_group] +| sort age +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+-----------+ +| firstname | age | age_group | +|-----------+-----+-----------| +| Nanette | 28 | young | +| Amber | 32 | adult | +| Dale | 33 | adult | +| Hattie | 36 | adult | ++-----------+-----+-----------+ +``` + + +## Example 2: Combining filtered subsets from the same index + +This example demonstrates how to combine multiple filtered subsets from the same index using union: + +```ppl +| union [search source=accounts | where balance > 30000] [search source=accounts | where age < 30] +| fields firstname, age, balance +| sort balance desc +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++-----------+-----+---------+ +| firstname | age | balance | +|-----------+-----+---------| +| Amber | 32 | 39225 | +| Nanette | 28 | 32838 | +| Nanette | 28 | 32838 | ++-----------+-----+---------+ +``` + +Note: Nanette appears twice because she meets both conditions (balance > 30000 AND age < 30), demonstrating UNION ALL semantics which preserve all rows including duplicates. + + +## Example 3: Mid-pipeline union (implicit first dataset) + +This example demonstrates using union mid-pipeline where the upstream result is implicitly included as the first dataset: + +```ppl +search source=accounts | where age > 30 | union [search source=accounts | where age < 30] +| fields firstname, age +| sort age +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+ +| firstname | age | +|-----------+-----| +| Nanette | 28 | +| Amber | 32 | +| Dale | 33 | +| Hattie | 36 | ++-----------+-----+ +``` + +Note: The upstream result `where age > 30` is automatically the first dataset, then unioned with `where age < 30`. + + +## Example 4: Using maxout option to limit results + +This example demonstrates how to limit the total number of results returned from a union operation using the `maxout` option. Note that UNION ALL semantics preserve duplicate rows: + +```ppl +| union maxout=3 [search source=accounts +| where balance > 20000] [search source=accounts +| where age > 30] +| fields firstname, age, balance +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++-----------+-----+---------+ +| firstname | age | balance | +|-----------+-----+---------| +| Amber | 32 | 39225 | +| Nanette | 28 | 32838 | +| Amber | 32 | 39225 | ++-----------+-----+---------+ +``` + +Note: Amber appears twice because she meets both conditions (balance > 20000 AND age > 30), demonstrating UNION ALL semantics which preserve all rows including duplicates. + + +## Example 5: Segmenting accounts by balance tier + +This example demonstrates how to create account segments based on balance thresholds for comparative analysis. It separates `high_balance` accounts from `regular` accounts and labels them for easy comparison: + +```ppl +| union [search source=accounts +| where balance > 20000 +| eval query_type = "high_balance" +| fields firstname, balance, query_type] [search source=accounts +| where balance > 0 AND balance <= 20000 +| eval query_type = "regular" +| fields firstname, balance, query_type] +| sort balance desc +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----------+---------+--------------+ +| firstname | balance | query_type | +|-----------+---------+--------------| +| Amber | 39225 | high_balance | +| Nanette | 32838 | high_balance | +| Hattie | 5686 | regular | +| Dale | 4180 | regular | ++-----------+---------+--------------+ +``` + + +## Limitations + +The `union` command has the following limitations: + +* At least two datasets must be specified. +* When fields with the same name exist across datasets but have different types, the system automatically performs type coercion to find a common supertype: + * **Compatible numeric types** → wider numeric type (for example, `INTEGER` and `BIGINT` coerce to `BIGINT`; `INTEGER` and `FLOAT` coerce to `FLOAT`) + * **String types** → `VARCHAR` (for example, `CHAR` and `VARCHAR` coerce to `VARCHAR`) + * **Temporal types** → wider temporal type (for example, `DATE` and `TIMESTAMP` coerce to `TIMESTAMP`) + * **Incompatible types** (different type families) → `VARCHAR` fallback (for example, `INTEGER` and `VARCHAR` coerce to `VARCHAR`) +* Missing fields across datasets are automatically filled with `NULL` values to unify schemas. +* Direct index references must be valid index names, patterns, or aliases (for example, `accounts`, `logs-*`, `security-alias`). diff --git a/docs/user/ppl/functions/aggregations.md b/docs/user/ppl/functions/aggregations.md index c4a84e31149..ce55b6a19d2 100644 --- a/docs/user/ppl/functions/aggregations.md +++ b/docs/user/ppl/functions/aggregations.md @@ -1,37 +1,46 @@ -# Aggregation Functions +# Aggregation functions -## Description +Aggregation functions perform calculations across multiple rows to return a single result value. These functions are used with the `stats`, `eventstats`, and `streamstats` commands to analyze and summarize data. -Aggregation functions perform calculations across multiple rows to return a single result value. These functions are used with `stats`, `eventstats` and `streamstats` commands to analyze and summarize data. -The following table shows how NULL/MISSING values are handled by aggregation functions: +The following table shows how `NULL` and missing values are handled by aggregation functions. -| Function | NULL | MISSING | +| Function | null | Missing | | --- | --- | --- | -| COUNT | Not counted | Not counted | -| SUM | Ignore | Ignore | -| AVG | Ignore | Ignore | -| MAX | Ignore | Ignore | -| MIN | Ignore | Ignore | -| FIRST | Ignore | Ignore | -| LAST | Ignore | Ignore | -| LIST | Ignore | Ignore | -| VALUES | Ignore | Ignore | - -## Functions +| `COUNT` | Not counted | Not counted | +| `SUM` | Ignored | Ignored | +| `AVG` | Ignored | Ignored | +| `MAX` | Ignored | Ignored | +| `MIN` | Ignored | Ignored | +| `FIRST` | Ignored | Ignored | +| `LAST` | Ignored | Ignored | +| `LIST` | Ignored | Ignored | +| `VALUES` | Ignored | Ignored | + +## Functions + +The following aggregation functions are available in PPL for data analysis and summarization. ### COUNT -#### Description +**Usage**: `COUNT(expr)`, `C(expr)`, `c(expr)`, `count(expr)` + +Counts the number of `expr` values in the retrieved rows. `C()`, `c()`, and `count()` are available as abbreviations for `COUNT()`. For filtered counting, use an `eval` expression to specify the filtering condition. + +**Parameters**: + +- `expr` (Optional): The expression whose values are to be counted. -Usage: Returns a count of the number of expr in the rows retrieved. The `C()` function, `c`, and `count` can be used as abbreviations for `COUNT()`. To perform a filtered counting, wrap the condition to satisfy in an `eval` expression. -### Example +**Return type**: `LONG` + +#### Example ```ppl source=accounts | stats count(), c(), count, c ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -42,14 +51,15 @@ fetched rows / total rows = 1/1 +---------+-----+-------+---+ ``` -Example of filtered counting - +The following example counts only records that match a specific condition: + ```ppl source=accounts | stats count(eval(age > 30)) as mature_users ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -62,17 +72,25 @@ fetched rows / total rows = 1/1 ### SUM -#### Description +**Usage**: `SUM(expr)` + +Returns the sum of `expr` values. + +**Parameters**: + +- `expr` (Required): The expression whose values are to be summed. -Usage: `SUM(expr)`. Returns the sum of expr. -### Example +**Return type**: Same as input type (`INTEGER`, `LONG`, `FLOAT`, or `DOUBLE`) + +#### Example ```ppl source=accounts | stats sum(age) by gender ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -86,17 +104,25 @@ fetched rows / total rows = 2/2 ### AVG -#### Description +**Usage**: `AVG(expr)` + +Returns the average value of `expr`. + +**Parameters**: -Usage: `AVG(expr)`. Returns the average value of expr. -### Example +- `expr` (Required): The expression whose values are to be averaged. + +**Return type**: `DOUBLE` for numeric inputs; same as input type for `DATE`, `TIME`, or `TIMESTAMP` inputs + +#### Example ```ppl source=accounts | stats avg(age) by gender ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -110,18 +136,25 @@ fetched rows / total rows = 2/2 ### MAX -#### Description +**Usage**: `MAX(expr)` + +Returns the maximum value of `expr`. For non-numeric fields, this function returns the value that comes last in alphabetical order. + +**Parameters**: -Usage: `MAX(expr)`. Returns the maximum value of expr. -For non-numeric fields, values are sorted lexicographically. -### Example +- `expr` (Required): The expression for which to find the maximum value. + +**Return type**: Same as input type + +#### Example ```ppl source=accounts | stats max(age) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -132,14 +165,15 @@ fetched rows / total rows = 1/1 +----------+ ``` -Example with text field - +The following example returns the value from the `firstname` text field that comes last in alphabetical order: + ```ppl source=accounts | stats max(firstname) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -152,18 +186,25 @@ fetched rows / total rows = 1/1 ### MIN -#### Description +**Usage**: `MIN(expr)` + +Returns the minimum value of `expr`. For non-numeric fields, this function returns the value that comes first in alphabetical order. + +**Parameters**: -Usage: `MIN(expr)`. Returns the minimum value of expr. -For non-numeric fields, values are sorted lexicographically. -### Example +- `expr` (Required): The expression for which to find the minimum value. + +**Return type**: Same as input type + +#### Example ```ppl source=accounts | stats min(age) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -174,14 +215,15 @@ fetched rows / total rows = 1/1 +----------+ ``` -Example with text field - +The following example returns the value from the `firstname` text field that comes first in alphabetical order: + ```ppl source=accounts | stats min(firstname) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -194,17 +236,25 @@ fetched rows / total rows = 1/1 ### VAR_SAMP -#### Description +**Usage**: `VAR_SAMP(expr)` + +Returns the sample variance of `expr`. + +**Parameters**: + +- `expr` (Required): The expression for which to calculate the sample variance. -Usage: `VAR_SAMP(expr)`. Returns the sample variance of expr. -### Example +**Return type**: `DOUBLE` + +#### Example ```ppl source=accounts | stats var_samp(age) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -217,17 +267,25 @@ fetched rows / total rows = 1/1 ### VAR_POP -#### Description +**Usage**: `VAR_POP(expr)` + +Returns the population variance of `expr`. + +**Parameters**: -Usage: `VAR_POP(expr)`. Returns the population standard variance of expr. -### Example +- `expr` (Required): The expression for which to calculate the population variance. + +**Return type**: `DOUBLE` + +#### Example ```ppl source=accounts | stats var_pop(age) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -240,17 +298,25 @@ fetched rows / total rows = 1/1 ### STDDEV_SAMP -#### Description +**Usage**: `STDDEV_SAMP(expr)` + +Returns the sample standard deviation of `expr`. -Usage: `STDDEV_SAMP(expr)`. Return the sample standard deviation of expr. -### Example +**Parameters**: + +- `expr` (Required): The expression for which to calculate the sample standard deviation. + +**Return type**: `DOUBLE` + +#### Example ```ppl source=accounts | stats stddev_samp(age) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -263,17 +329,25 @@ fetched rows / total rows = 1/1 ### STDDEV_POP -#### Description +**Usage**: `STDDEV_POP(expr)` -Usage: `STDDEV_POP(expr)`. Return the population standard deviation of expr. -### Example +Returns the population standard deviation of `expr`. + +**Parameters**: + +- `expr` (Required): The expression for which to calculate the population standard deviation. + +**Return type**: `DOUBLE` + +#### Example ```ppl source=accounts | stats stddev_pop(age) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -284,20 +358,27 @@ fetched rows / total rows = 1/1 +--------------------+ ``` -### DISTINCT_COUNT, DC +### DISTINCT_COUNT, DC + +**Usage**: `DISTINCT_COUNT(expr)`, `DC(expr)` + +Returns the approximate number of distinct values using the `HyperLogLog++` algorithm. Both functions are equivalent. For more information about algorithm accuracy and precision control, see [Controlling precision](https://docs.opensearch.org/latest/aggregations/metric/cardinality/#controlling-precision). + +**Parameters**: -#### Description +- `expr` (Required): The expression for which to count distinct values. -Usage: `DISTINCT_COUNT(expr)`, `DC(expr)`. Returns the approximate number of distinct values using the HyperLogLog++ algorithm. Both functions are equivalent. -For details on algorithm accuracy and precision control, see the [OpenSearch Cardinality Aggregation documentation](https://docs.opensearch.org/latest/aggregations/metric/cardinality/#controlling-precision). -### Example +**Return type**: `LONG` + +#### Example ```ppl source=accounts | stats dc(state) as distinct_states, distinct_count(state) as dc_states_alt by gender ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -311,17 +392,25 @@ fetched rows / total rows = 2/2 ### DISTINCT_COUNT_APPROX -#### Description +**Usage**: `DISTINCT_COUNT_APPROX(expr)` + +Returns the approximate count of distinct values in `expr` using the `HyperLogLog++` algorithm. + +**Parameters**: -Usage: `DISTINCT_COUNT_APPROX(expr)`. Return the approximate distinct count value of the expr, using the hyperloglog++ algorithm. -### Example +- `expr` (Required): The expression for which to count approximate distinct values. + +**Return type**: `LONG` + +#### Example ```ppl source=accounts | stats distinct_count_approx(gender) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -334,21 +423,27 @@ fetched rows / total rows = 1/1 ### EARLIEST -#### Description +**Usage**: `EARLIEST(field [, time_field])` -Usage: `EARLIEST(field [, time_field])`. Return the earliest value of a field based on timestamp ordering. -* `field`: mandatory. The field to return the earliest value for. -* `time_field`: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. - -### Example +Returns the earliest value of a `field` based on timestamp ordering. + +**Parameters**: + +- `field` (Required): The field for which to return the earliest value. +- `time_field` (Optional): The field to use for time-based ordering. Defaults to `@timestamp` if not specified. + +**Return type**: Same as input field type + +#### Example ```ppl source=events | stats earliest(message) by host | sort host ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -360,15 +455,16 @@ fetched rows / total rows = 2/2 +-------------------+---------+ ``` -Example with custom time field - +The following example uses a custom time field instead of the default `@timestamp` field for ordering: + ```ppl source=events | stats earliest(status, event_time) by category | sort category ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -382,21 +478,27 @@ fetched rows / total rows = 2/2 ### LATEST -#### Description +**Usage**: `LATEST(field [, time_field])` -Usage: `LATEST(field [, time_field])`. Return the latest value of a field based on timestamp ordering. -* `field`: mandatory. The field to return the latest value for. -* `time_field`: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. - -### Example +Returns the latest value of a `field` based on timestamp ordering. + +**Parameters**: + +- `field` (Required): The field for which to return the latest value. +- `time_field` (Optional): The field to use for time-based ordering. Defaults to `@timestamp` if not specified. + +**Return type**: Same as input field type + +#### Example ```ppl source=events | stats latest(message) by host | sort host ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -408,15 +510,16 @@ fetched rows / total rows = 2/2 +------------------+---------+ ``` -Example with custom time field - +The following example uses a custom time field instead of the default `@timestamp` field for ordering: + ```ppl source=events | stats latest(status, event_time) by category | sort category ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -430,20 +533,26 @@ fetched rows / total rows = 2/2 ### TAKE -#### Description +**Usage**: `TAKE(field [, size])` -Usage: `TAKE(field [, size])`. Return original values of a field. It does not guarantee on the order of values. -* `field`: mandatory. The field must be a text field. -* `size`: optional integer. The number of values should be returned. Default is 10. - -### Example +Returns the original values from a field. This function does not guarantee the order of the returned values. + +**Parameters**: + +- `field` (Required): A text field from which to extract values. +- `size` (Optional): The number of values to return. Defaults to `10`. + +**Return type**: `ARRAY` + +#### Example ```ppl source=accounts | stats take(firstname) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -454,22 +563,31 @@ fetched rows / total rows = 1/1 +-----------------------------+ ``` -### PERCENTILE or PERCENTILE_APPROX +### PERCENTILE, PERCENTILE_APPROX -#### Description +**Usage**: `PERCENTILE(expr, percent)`, `PERCENTILE_APPROX(expr, percent)` -Usage: `PERCENTILE(expr, percent)` or `PERCENTILE_APPROX(expr, percent)`. Return the approximate percentile value of expr at the specified percentage. -* `percent`: The number must be a constant between 0 and 100. - -Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref [issue link](https://github.com/opensearch-project/OpenSearch/issues/18122). -### Example +Returns the approximate percentile value of `expr` at the specified percentage. + +**Parameters**: + +- `expr` (Required): The expression for which to calculate the percentile. +- `percent` (Required): A constant number between `0` and `100`. + +**Return type**: Same as input type + +Starting in version 3.1.0, the percentile implementation switched from `AVLTreeDigest` to `MergingDigest`. For more information, see the [corresponding issue](https://github.com/opensearch-project/OpenSearch/issues/18122). +{: .note} + +#### Example ```ppl source=accounts | stats percentile(age, 90) by gender ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -481,20 +599,21 @@ fetched rows / total rows = 2/2 +---------------------+--------+ ``` -#### Percentile Shortcut Functions +#### Percentile shortcut functions For convenience, OpenSearch PPL provides shortcut functions for common percentiles: -- `PERC(expr)` - Equivalent to `PERCENTILE(expr, )` -- `P(expr)` - Equivalent to `PERCENTILE(expr, )` - -Both integer and decimal percentiles from 0 to 100 are supported (e.g., `PERC95`, `P99.5`). +- `PERC(expr)` - Equivalent to `PERCENTILE(expr, )`. +- `P(expr)` - Equivalent to `PERCENTILE(expr, )`. + +Both integer and decimal percentiles from `0` to `100` are supported (for example, `PERC95`, `P99.5`): ```ppl source=accounts | stats perc99.5(age); ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -509,8 +628,9 @@ fetched rows / total rows = 1/1 source=accounts | stats p50(age); ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -523,17 +643,25 @@ fetched rows / total rows = 1/1 ### MEDIAN -#### Description +**Usage**: `MEDIAN(expr)` -Usage: `MEDIAN(expr)`. Returns the median (50th percentile) value of `expr`. This is equivalent to `PERCENTILE(expr, 50)`. -### Example +Returns the median (50th percentile) value of `expr`. This is equivalent to `PERCENTILE(expr, 50)`. + +**Parameters**: + +- `expr` (Required): The expression for which to calculate the median. + +**Return type**: Same as input type + +#### Example ```ppl source=accounts | stats median(age) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -546,19 +674,25 @@ fetched rows / total rows = 1/1 ### FIRST -#### Description +**Usage**: `FIRST(field)` -Usage: `FIRST(field)`. Return the first non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. -* `field`: mandatory. The field to return the first value for. - -### Example +Returns the first non-null value of a `field` based on natural document order. Returns `NULL` if no records exist or if all records have `NULL` values for the `field`. + +**Parameters**: + +- `field` (Required): The field for which to return the first value. + +**Return type**: Same as input field type + +#### Example ```ppl source=accounts | stats first(firstname) by gender ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -572,19 +706,25 @@ fetched rows / total rows = 2/2 ### LAST -#### Description +**Usage**: `LAST(field)` -Usage: `LAST(field)`. Return the last non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. -* `field`: mandatory. The field to return the last value for. - -### Example +Returns the last non-null value of a `field` based on natural document order. Returns `NULL` if no records exist or if all records have `NULL` values for the `field`. + +**Parameters**: + +- `field` (Required): The field for which to return the last value. + +**Return type**: Same as input field type + +#### Example ```ppl source=accounts | stats last(firstname) by gender ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -598,21 +738,30 @@ fetched rows / total rows = 2/2 ### LIST -#### Description +**Usage**: `LIST(expr)` -Usage: `LIST(expr)`. Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved. -The function returns up to 100 values with no guaranteed ordering. -* `expr`: The field expression to collect values from. -* This aggregation function doesn't support Array, Struct, Object field types. - -Example with string fields +Collects all values from the specified expression into an array. Values are converted to strings, `NULL` values are filtered out, and duplicates are preserved. This function returns up to `100` values without a guaranteed order. + +**Parameters**: + +- `expr` (Required): The field expression from which to collect values. + +**Return type**: `ARRAY` + +This aggregation function does not support array, struct, or object field types. +{: .note} + +#### Example + +The following example collects all values from a string field into an array: ```ppl source=accounts | stats list(firstname) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -625,22 +774,32 @@ fetched rows / total rows = 1/1 ### VALUES -#### Description +**Usage**: `VALUES(expr)` -Usage: `VALUES(expr)`. Collects all unique values from the specified expression into a sorted array. Values are converted to strings, nulls are filtered, and duplicates are removed. -The maximum number of unique values returned is controlled by the `plugins.ppl.values.max.limit` setting: -* Default value is 0, which means unlimited values are returned -* Can be configured to any positive integer to limit the number of unique values -* See the [PPL Settings](../admin/settings.md#plugins-ppl-values-max-limit) documentation for more details - -Example with string fields +Collects all unique values from the specified expression into a sorted array. Values are converted to strings, `NULL` values are filtered out, and duplicates are removed. + +**Parameters**: + +- `expr` (Required): The expression from which to collect unique values. + +**Return type**: `ARRAY` + +> The `plugins.ppl.values.max.limit` setting controls the maximum number of unique values returned: +> - The default value is 0, which returns an unlimited number of values. +> - Setting this to any positive integer limits the number of unique values. +> - See the [PPL Settings](../admin/settings.md#plugins-ppl-values-max-limit) documentation for more details + +#### Example + +The following example collects unique values from a string field into a sorted array: ```ppl source=accounts | stats values(firstname) ``` + -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 diff --git a/docs/user/ppl/functions/collection.md b/docs/user/ppl/functions/collection.md index ca9f7015c1a..006a49ef6d5 100644 --- a/docs/user/ppl/functions/collection.md +++ b/docs/user/ppl/functions/collection.md @@ -1,13 +1,25 @@ -# PPL Collection Functions +# Collection functions -## ARRAY +Collection functions create, manipulate, and analyze arrays and multivalue fields in data. These functions are essential for working with complex data structures and performing operations such as filtering, transforming, and analyzing array elements. -### Description +The following collection functions are supported in PPL. -Usage: `array(value1, value2, value3...)` create an array with input values. Currently we don't allow mixture types. We will infer a least restricted type, for example `array(1, "demo")` -> ["1", "demo"] -**Argument type:** `value1: ANY, value2: ANY, ...` -**Return type:** `ARRAY` -### Example +## ARRAY + +**Usage**: `array(value1, value2, value3...)` + +Creates an array containing the input values. Mixed types are automatically converted to the least restrictive type. For example, `array(1, "demo")` returns `["1", "demo"]` where the integer is converted to a string. + +**Parameters**: + +- `value1` (Required): A value of any type to include in the array. +- `value2`, `value3` (Optional): Additional values of any type to include in the array. + +**Return type**: `ARRAY` + +#### Example + +The following example creates an array with numeric values: ```ppl source=people @@ -16,7 +28,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -26,6 +38,8 @@ fetched rows / total rows = 1/1 | [1,2,3] | +---------+ ``` + +The following example demonstrates mixed-type conversion: ```ppl source=people @@ -34,7 +48,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -45,15 +59,20 @@ fetched rows / total rows = 1/1 +----------+ ``` -## ARRAY_LENGTH +## ARRAY_LENGTH -### Description +**Usage**: `array_length(array)` + +Returns the length of the input `array`. + +**Parameters**: + +- `array` (Required): The array for which to return the length. + +**Return type**: `INTEGER` + +#### Example -Usage: `array_length(array)` returns the length of input array. -**Argument type:** `array:ARRAY` -**Return type:** `INTEGER` -### Example - ```ppl source=people | eval array = array(1, 2, 3) @@ -62,7 +81,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -73,15 +92,21 @@ fetched rows / total rows = 1/1 +--------+ ``` -## FORALL +## FORALL -### Description +**Usage**: `forall(array, function)` + +Checks whether all elements in the array satisfy the lambda function condition. The lambda function must accept a single input parameter and return a Boolean value. + +**Parameters**: + +- `array` (Required): The array to check. +- `function` (Required): A lambda function that returns a Boolean value and accepts a single input parameter. + +**Return type**: `BOOLEAN` + +#### Example -Usage: `forall(array, function)` check whether all element inside array can meet the lambda function. The function should also return boolean. The lambda function accepts one single input. -**Argument type:** `array:ARRAY, function:LAMBDA` -**Return type:** `BOOLEAN` -### Example - ```ppl source=people | eval array = array(1, 2, 3), result = forall(array, x -> x > 0) @@ -89,7 +114,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -100,15 +125,21 @@ fetched rows / total rows = 1/1 +--------+ ``` -## EXISTS +## EXISTS -### Description +**Usage**: `exists(array, function)` + +Checks whether at least one element in the array satisfies the lambda function condition. The lambda function must accept a single input parameter and return a Boolean value. + +**Parameters**: + +- `array` (Required): The array to check. +- `function` (Required): A lambda function that returns a Boolean value and accepts a single input parameter. + +**Return type**: `BOOLEAN` + +#### Example -Usage: `exists(array, function)` check whether existing one of element inside array can meet the lambda function. The function should also return boolean. The lambda function accepts one single input. -**Argument type:** `array:ARRAY, function:LAMBDA` -**Return type:** `BOOLEAN` -### Example - ```ppl source=people | eval array = array(-1, -2, 3), result = exists(array, x -> x > 0) @@ -116,7 +147,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -127,15 +158,21 @@ fetched rows / total rows = 1/1 +--------+ ``` -## FILTER +## FILTER -### Description +**Usage**: `filter(array, function)` + +Filters the elements in the array using a lambda function. The lambda function must accept a single input parameter and return a Boolean value. + +**Parameters**: + +- `array` (Required): The array to filter. +- `function` (Required): A lambda function that returns a Boolean value and accepts a single input parameter. + +**Return type**: `ARRAY` + +#### Example -Usage: `filter(array, function)` filter the element in the array by the lambda function. The function should return boolean. The lambda function accepts one single input. -**Argument type:** `array:ARRAY, function:LAMBDA` -**Return type:** `ARRAY` -### Example - ```ppl source=people | eval array = array(1, -2, 3), result = filter(array, x -> x > 0) @@ -143,7 +180,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -154,15 +191,23 @@ fetched rows / total rows = 1/1 +--------+ ``` -## TRANSFORM +## TRANSFORM -### Description +**Usage**: `transform(array, function)` + +Transforms the elements of the `array` one by one using a lambda function. The lambda function can accept one or two inputs. If the lambda function accepts two parameters, the second parameter is the index of the element in the `array`. + +**Parameters**: + +- `array` (Required): The array to transform. +- `function` (Required): A lambda function that accepts one or two input parameters and returns a transformed value. + +**Return type**: `ARRAY` + +#### Example + +The following example transforms each element by adding 2: -Usage: `transform(array, function)` transform the element of array one by one using lambda. The lambda function can accept one single input or two input. If the lambda accepts two argument, the second one is the index of element in array. -**Argument type:** `array:ARRAY, function:LAMBDA` -**Return type:** `ARRAY` -### Example - ```ppl source=people | eval array = array(1, -2, 3), result = transform(array, x -> x + 2) @@ -170,7 +215,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -180,6 +225,8 @@ fetched rows / total rows = 1/1 | [3,0,5] | +---------+ ``` + +The following example uses both element value and index in the transformation: ```ppl source=people @@ -188,7 +235,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -199,15 +246,25 @@ fetched rows / total rows = 1/1 +----------+ ``` -## REDUCE +## REDUCE -### Description +**Usage**: `reduce(array, acc_base, function, )` + +Uses a lambda function to iterate through all elements and interact with the accumulator base value. The lambda function accepts two parameters: the accumulator and the array element. When an optional `reduce_function` is provided, it is applied to the final accumulator value. The reduce function accepts the accumulator as a single parameter. + +**Parameters**: + +- `array` (Required): The array to reduce. +- `acc_base` (Required): The initial accumulator value. +- `function` (Required): A lambda function that accepts accumulator and array element as parameters. +- `reduce_function` (Optional): A lambda function to apply to the final accumulator value. + +**Return type**: Same as accumulator type (determined by `acc_base` and `reduce_function`) + +#### Example + +The following example reduces an array by summing all elements with an initial value: -Usage: `reduce(array, acc_base, function, )` use lambda function to go through all element and interact with acc_base. The lambda function accept two argument accumulator and array element. If add one more reduce_function, will apply reduce_function to accumulator finally. The reduce function accept accumulator as the one argument. -**Argument type:** `array:ARRAY, acc_base:ANY, function:LAMBDA, reduce_function:LAMBDA` -**Return type:** `ANY` -### Example - ```ppl source=people | eval array = array(1, -2, 3), result = reduce(array, 10, (acc, x) -> acc + x) @@ -215,7 +272,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -225,6 +282,8 @@ fetched rows / total rows = 1/1 | 12 | +--------+ ``` + +The following example uses an additional reduce function to transform the final result: ```ppl source=people @@ -233,7 +292,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -244,15 +303,23 @@ fetched rows / total rows = 1/1 +--------+ ``` -## MVJOIN +## MVJOIN -### Description +**Usage**: `mvjoin(array, delimiter)` + +Joins string array elements into a single string, separated by the specified delimiter. `NULL` elements are excluded from the output. Only string arrays are supported. + +**Parameters**: + +- `array` (Required): An array of strings to join. +- `delimiter` (Required): The string to use as a separator between array elements. + +**Return type**: `STRING` + +#### Example + +The following example joins an array of strings with a comma delimiter: -Usage: `mvjoin(array, delimiter)` joins string array elements into a single string, separated by the specified delimiter. NULL elements are excluded from the output. Only string arrays are supported. -**Argument type:** `array: ARRAY of STRING, delimiter: STRING` -**Return type:** `STRING` -### Example - ```ppl source=people | eval result = mvjoin(array('a', 'b', 'c'), ',') @@ -260,7 +327,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -270,6 +337,8 @@ fetched rows / total rows = 1/1 | a,b,c | +--------+ ``` + +The following example joins field values into a single string: ```ppl source=accounts @@ -279,7 +348,7 @@ source=accounts | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -290,15 +359,24 @@ fetched rows / total rows = 1/1 +-------------+ ``` -## MVAPPEND +## MVAPPEND -### Description +**Usage**: `mvappend(value1, value2, value3...)` + +Appends all elements from parameters to create an array. Flattens array parameters and collects all individual elements. Always returns an array or `NULL` for consistent type behavior. + +**Parameters**: + +- `value1` (Required): A value of any type to append to the array. +- `value2` (Optional): Additional values of any type to append to the array. +- `...` (Optional): Any number of additional values. + +**Return type**: `ARRAY` + +#### Example + +The following example appends multiple values to create an array: -Usage: `mvappend(value1, value2, value3...)` appends all elements from arguments to create an array. Flattens array arguments and collects all individual elements. Always returns an array or null for consistent type behavior. -**Argument type:** `value1: ANY, value2: ANY, ...` -**Return type:** `ARRAY` -### Example - ```ppl source=people | eval result = mvappend(1, 1, 3) @@ -306,7 +384,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -316,6 +394,8 @@ fetched rows / total rows = 1/1 | [1,1,3] | +---------+ ``` + +The following example demonstrates array flattening: ```ppl source=people @@ -324,7 +404,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -334,6 +414,8 @@ fetched rows / total rows = 1/1 | [1,2,3] | +---------+ ``` + +The following example shows nested `mvappend` calls: ```ppl source=people @@ -342,7 +424,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -352,6 +434,8 @@ fetched rows / total rows = 1/1 | [1,2,3] | +---------+ ``` + +The following example creates an array from a single value: ```ppl source=people @@ -360,7 +444,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -370,6 +454,8 @@ fetched rows / total rows = 1/1 | [42] | +--------+ ``` + +The following example demonstrates `NULL` value filtering: ```ppl source=people @@ -378,7 +464,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -388,6 +474,8 @@ fetched rows / total rows = 1/1 | [2] | +--------+ ``` + +The following example shows behavior with only `NULL` values: ```ppl source=people @@ -396,7 +484,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -406,6 +494,8 @@ fetched rows / total rows = 1/1 | null | +--------+ ``` + +The following example concatenates multiple arrays: ```ppl source=people @@ -414,7 +504,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -424,6 +514,8 @@ fetched rows / total rows = 1/1 | [1,2,3,4] | +-----------+ ``` + +The following example appends field values: ```ppl source=accounts @@ -432,7 +524,7 @@ source=accounts | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -442,6 +534,8 @@ fetched rows / total rows = 1/1 | [Amber,Duke] | +--------------+ ``` + +The following example demonstrates mixed data types: ```ppl source=people @@ -450,7 +544,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -461,17 +555,22 @@ fetched rows / total rows = 1/1 +--------------+ ``` -## SPLIT +## SPLIT + +**Usage**: `split(str, delimiter)` -### Description +Splits the string values on the delimiter and returns the string values as a multivalue field (array). Use an empty string (`""`) to split the original string into one value per character. If the delimiter is not found, the function returns an array containing the original string. If the input string is empty, the function returns an empty array. -Usage: `split(str, delimiter)` splits the string values on the delimiter and returns the string values as a multivalue field (array). Use an empty string ("") to split the original string into one value per character. If the delimiter is not found, returns an array containing the original string. If the input string is empty, returns an empty array. +**Parameters**: -**Argument type:** `str: STRING, delimiter: STRING` +- `str` (Required): The string to split. +- `delimiter` (Required): The string to use as a delimiter for splitting. -**Return type:** `ARRAY of STRING` +**Return type**: `ARRAY` -### Example +#### Example + +The following example splits a string using a semicolon delimiter: ```ppl source=people @@ -480,7 +579,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -491,6 +590,8 @@ fetched rows / total rows = 1/1 +------------------------------------+ ``` +The following example uses a multi-character delimiter: + ```ppl source=people | eval test = '1a2b3c4def567890', result = split(test, 'def') @@ -498,7 +599,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -509,6 +610,8 @@ fetched rows / total rows = 1/1 +------------------+ ``` +The following example splits a string into individual characters using an empty delimiter: + ```ppl source=people | eval test = 'abcd', result = split(test, '') @@ -516,7 +619,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -527,6 +630,8 @@ fetched rows / total rows = 1/1 +-----------+ ``` +The following example splits using a double-colon delimiter: + ```ppl source=people | eval test = 'name::value', result = split(test, '::') @@ -534,7 +639,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -545,6 +650,8 @@ fetched rows / total rows = 1/1 +--------------+ ``` +The following example shows behavior when the delimiter is not found: + ```ppl source=people | eval test = 'hello', result = split(test, ',') @@ -552,7 +659,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -563,15 +670,22 @@ fetched rows / total rows = 1/1 +---------+ ``` -## MVDEDUP +## MVDEDUP -### Description +**Usage**: `mvdedup(array)` + +Removes duplicate values from a multivalue array while preserving the order of the first occurrence. `NULL` elements are filtered out. Returns a deduplicated array, or `NULL` if the input is `NULL`. + +**Parameters**: + +- `array` (Required): The array from which to remove duplicates. + +**Return type**: `ARRAY` + +#### Example + +The following example removes duplicate numbers while preserving order: -Usage: `mvdedup(array)` removes duplicate values from a multivalue array while preserving the order of first occurrence. NULL elements are filtered out. Returns an array with duplicates removed, or null if the input is null. -**Argument type:** `array: ARRAY` -**Return type:** `ARRAY` -### Example - ```ppl source=people | eval array = array(1, 2, 2, 3, 1, 4), result = mvdedup(array) @@ -579,7 +693,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -589,6 +703,8 @@ fetched rows / total rows = 1/1 | [1,2,3,4] | +-----------+ ``` + +The following example deduplicates string values: ```ppl source=people @@ -597,7 +713,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -607,6 +723,8 @@ fetched rows / total rows = 1/1 | [z,a,b,c] | +-----------+ ``` + +The following example shows behavior with an empty array: ```ppl source=people @@ -615,7 +733,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -628,12 +746,20 @@ fetched rows / total rows = 1/1 ## MVFIND -### Description +**Usage**: `mvfind(array, regex)` + +Searches a multivalue array and returns the `0`-based index of the first element that matches the regular expression. Returns `NULL` if no match is found. + +**Parameters**: + +- `array` (Required): The array to search. +- `regex` (Required): The regular expression pattern to match against array elements. + +**Return type**: `INTEGER` (or `NULL` if no match found) -Usage: mvfind(array, regex) searches a multivalue array and returns the 0-based index of the first element that matches the regular expression. Returns NULL if no match is found. -Argument type: array: ARRAY, regex: STRING -Return type: INTEGER (nullable) -Example +#### Example + +The following example searches for the first element that matches a regular expression: ```ppl source=people @@ -642,7 +768,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -653,6 +779,8 @@ fetched rows / total rows = 1/1 +--------+ ``` +The following example shows behavior when no match is found: + ```ppl source=people | eval array = array('cat', 'dog', 'bird'), result = mvfind(array, 'fish') @@ -660,7 +788,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -671,6 +799,8 @@ fetched rows / total rows = 1/1 +--------+ ``` +The following example uses a regex pattern with character classes: + ```ppl source=people | eval array = array('error123', 'info', 'error456'), result = mvfind(array, 'error[0-9]+') @@ -678,7 +808,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -689,6 +819,8 @@ fetched rows / total rows = 1/1 +--------+ ``` +The following example demonstrates case-insensitive matching: + ```ppl source=people | eval array = array('Apple', 'Banana', 'Cherry'), result = mvfind(array, '(?i)banana') @@ -696,7 +828,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -707,15 +839,24 @@ fetched rows / total rows = 1/1 +--------+ ``` -## MVINDEX +## MVINDEX -### Description +**Usage**: `mvindex(array, start, [end])` + +Returns a subset of the multivalue array using the start and optional end index values. Indexes are `0`-based (the first element is at index `0`). Supports negative indexing where `-1` refers to the last element. When only start is provided, the function returns a single element. When both start and end are provided, the function returns an array of elements from start to end (inclusive). + +**Parameters**: + +- `array` (Required): The array from which to extract elements. +- `start` (Required): The starting index (`0`-based). +- `end` (Optional): The ending index (`0`-based, inclusive). + +**Return type**: Single element type when only `start` is provided; `ARRAY` when both `start` and `end` are provided + +#### Example + +The following example gets a single element at index 1: -Usage: `mvindex(array, start, [end])` returns a subset of the multivalue array using the start and optional end index values. Indexes are 0-based (first element is at index 0). Supports negative indexing where -1 refers to the last element. When only start is provided, returns a single element. When both start and end are provided, returns an array of elements from start to end (inclusive). -**Argument type:** `array: ARRAY, start: INTEGER, end: INTEGER (optional)` -**Return type:** `ANY (single element) or ARRAY (range)` -### Example - ```ppl source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, 1) @@ -723,7 +864,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -733,6 +874,8 @@ fetched rows / total rows = 1/1 | b | +--------+ ``` + +The following example uses negative indexing to get the last element: ```ppl source=people @@ -741,7 +884,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -751,6 +894,8 @@ fetched rows / total rows = 1/1 | e | +--------+ ``` + +The following example extracts a range of elements: ```ppl source=people @@ -759,7 +904,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -769,6 +914,8 @@ fetched rows / total rows = 1/1 | [2,3,4] | +---------+ ``` + +The following example uses negative indexing for a range: ```ppl source=people @@ -777,7 +924,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -787,6 +934,8 @@ fetched rows / total rows = 1/1 | [3,4,5] | +---------+ ``` + +The following example extracts elements from the beginning of an array: ```ppl source=people @@ -795,7 +944,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -808,12 +957,20 @@ fetched rows / total rows = 1/1 ## MVMAP -### Description +**Usage**: `mvmap(array, expression)` -Usage: mvmap(array, expression) iterates over each element of a multivalue array, applies the expression to each element, and returns a multivalue array with the transformed results. The field name in the expression is implicitly bound to each element value. -Argument type: array: ARRAY, expression: EXPRESSION -Return type: ARRAY -Example +Iterates over each element of a multivalue array, applies the expression to each element, and returns a multivalue array containing the transformed results. The field name in the expression is implicitly bound to each element value. + +**Parameters**: + +- `array` (Required): The array to map over. +- `expression` (Required): The expression to apply to each element. + +**Return type**: `ARRAY` + +#### Example + +The following example applies a mathematical operation to each element of an array: ```ppl source=people @@ -822,7 +979,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -833,6 +990,8 @@ fetched rows / total rows = 1/1 +------------+ ``` +The following example applies a different mathematical operation: + ```ppl source=people | eval array = array(1, 2, 3), result = mvmap(array, array + 5) @@ -840,7 +999,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -851,9 +1010,9 @@ fetched rows / total rows = 1/1 +---------+ ``` -Note: For nested expressions like ``mvmap(mvindex(arr, 1, 3), arr * 2)``, the field name (``arr``) is extracted from the first argument and must match the field referenced in the expression. +> **Note**: For nested expressions such as `mvmap(mvindex(arr, 1, 3), arr * 2)`, the field name (`arr`) is extracted from the first argument and must match the field referenced in the expression. -The expression can also reference other single-value fields: +The following example shows how the expression can reference other single-value fields: ```ppl source=people @@ -862,7 +1021,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -876,19 +1035,27 @@ fetched rows / total rows = 1/1 ## MVZIP -### Description +**Usage**: `mvzip(mv_left, mv_right, [delim])` + +Combines the values in two multivalue arrays by pairing corresponding elements and joining them into strings. The delimiter specifies the character or string used to join the two values. This is similar to the Python zip command. -Usage: `mvzip(mv_left, mv_right, [delim])` combines the values in two multivalue arrays by pairing corresponding elements and joining them into strings. The delimiter is used to specify a delimiting character to join the two values. This is similar to the Python zip command. +The values are combined by pairing the first value of `mv_left` with the first value of `mv_right`, then the second with the second, and so on. Each pair is concatenated into a string using the delimiter. The function stops at the length of the shorter array. -The values are stitched together combining the first value of mv_left with the first value of mv_right, then the second with the second, and so on. Each pair is concatenated into a string using the delimiter. The function stops at the length of the shorter array. +The delimiter is optional. When specified, it must be enclosed in quotation marks. The default delimiter is a comma. -The delimiter is optional. When specified, it must be enclosed in quotation marks. The default delimiter is a comma ( , ). +Returns `NULL` if either input is `NULL`. Returns an empty array if either input array is empty. -Returns null if either input is null. Returns an empty array if either input array is empty. +**Parameters**: -**Argument type:** `mv_left: ARRAY, mv_right: ARRAY, delim: STRING (optional)` -**Return type:** `ARRAY of STRING` -### Example +- `mv_left` (Required): The first array to combine. +- `mv_right` (Required): The second array to combine. +- `delim` (Optional): The delimiter to use for joining pairs. Defaults to comma. + +**Return type**: `ARRAY` + +#### Example + +The following example combines host and port arrays with a colon delimiter: ```ppl source=people @@ -897,7 +1064,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -908,6 +1075,8 @@ fetched rows / total rows = 1/1 +----------------------+ ``` +The following example uses a pipe delimiter with equal-length arrays: + ```ppl source=people | eval arr1 = array('a', 'b', 'c'), arr2 = array('x', 'y', 'z'), result = mvzip(arr1, arr2, '|') @@ -915,7 +1084,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -926,6 +1095,8 @@ fetched rows / total rows = 1/1 +---------------+ ``` +The following example demonstrates behavior with arrays of different lengths: + ```ppl source=people | eval arr1 = array('1', '2', '3'), arr2 = array('a', 'b'), result = mvzip(arr1, arr2, '-') @@ -933,7 +1104,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -944,6 +1115,8 @@ fetched rows / total rows = 1/1 +-----------+ ``` +The following example shows nested mvzip calls: + ```ppl source=people | eval arr1 = array('a', 'b', 'c'), arr2 = array('x', 'y', 'z'), arr3 = array('1', '2', '3'), result = mvzip(mvzip(arr1, arr2, '-'), arr3, ':') @@ -951,7 +1124,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -962,6 +1135,8 @@ fetched rows / total rows = 1/1 +---------------------+ ``` +The following example shows behavior with an empty array: + ```ppl source=people | eval arr1 = array('a', 'b'), arr2 = array(), result = mvzip(arr1, arr2) @@ -969,7 +1144,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 diff --git a/docs/user/ppl/functions/condition.md b/docs/user/ppl/functions/condition.md index cb5dff9107e..95f805ade94 100644 --- a/docs/user/ppl/functions/condition.md +++ b/docs/user/ppl/functions/condition.md @@ -1,22 +1,26 @@ -# Condition Functions -PPL functions use the search capabilities of the OpenSearch engine. However, these functions don't execute directly within the OpenSearch plugin's memory. Instead, they facilitate the global filtering of query results based on specific conditions, such as a `WHERE` or `HAVING` clause. +# Conditional functions -The following sections describe the condition PPL functions. -## ISNULL +PPL conditional functions enable global filtering of query results based on specific conditions, such as `WHERE` or `HAVING` clauses. These functions use the search capabilities of the OpenSearch engine but don't execute directly within the OpenSearch plugin's memory. +## ISNULL -### Description +**Usage**: `isnull(field)` -Usage: `isnull(field)` returns TRUE if field is NULL, FALSE otherwise. +Returns `TRUE` if the field is `NULL`, `FALSE` otherwise. + +The `field IS NULL` predicate syntax is also supported as a synonym. The `isnull()` function is commonly used: -- In `eval` expressions to create conditional fields -- With the `if()` function to provide default values -- In `where` clauses to filter null records - -**Argument type:** All supported data types. -**Return type:** `BOOLEAN` +- In `eval` expressions to create conditional fields. +- With the `if()` function to provide default values. +- In `where` clauses to filter null records. -### Example +**Parameters**: + +- `field` (Required): The field to check for null values. + +**Return type**: `BOOLEAN` + +#### Example ```ppl source=accounts @@ -24,7 +28,7 @@ source=accounts | fields result, employer, firstname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -38,7 +42,7 @@ fetched rows / total rows = 4/4 +--------+----------+-----------+ ``` -Using with if() to label records +The following example demonstrates using `isnull` with the `if` function to create conditional labels: ```ppl source=accounts @@ -46,7 +50,7 @@ source=accounts | fields firstname, employer, status ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -60,15 +64,23 @@ fetched rows / total rows = 4/4 +-----------+----------+------------+ ``` -Filtering with where clause - +The following example filters records using `isnull` in a `where` clause: + ```ppl source=accounts | where isnull(employer) | fields account_number, firstname, employer ``` + +The `IS NULL` predicate syntax can be used as an equivalent alternative: + +```ppl +source=accounts +| where employer IS NULL +| fields account_number, firstname, employer +``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -79,23 +91,29 @@ fetched rows / total rows = 1/1 +----------------+-----------+----------+ ``` -## ISNOTNULL +## ISNOTNULL + +**Usage**: `isnotnull(field)` -### Description +Returns `TRUE` if the field is NOT `NULL`, `FALSE` otherwise. -Usage: `isnotnull(field)` returns TRUE if field is NOT NULL, FALSE otherwise. The `isnotnull(field)` function is the opposite of `isnull(field)`. Instead of checking for null values, it checks a specific field and returns `true` if the field contains data, that is, it is not null. +The `field IS NOT NULL` predicate syntax is also supported as a synonym. The `isnotnull()` function is commonly used: -- In `eval` expressions to create boolean flags -- In `where` clauses to filter out null values -- With the `if()` function for conditional logic -- To validate data presence - -**Argument type:** All supported data types. -**Return type:** `BOOLEAN` -**Synonyms:** [ISPRESENT](#ispresent) +- In `eval` expressions to create Boolean flags. +- In `where` clauses to filter out null values. +- With the `if()` function for conditional logic. +- To validate data presence. -### Example +**Synonyms**: [ISPRESENT](#ispresent) + +**Parameters**: + +- `field` (Required): The field to check for non-null values. + +**Return type**: `BOOLEAN` + +#### Example ```ppl source=accounts @@ -103,7 +121,7 @@ source=accounts | fields firstname, employer, has_employer ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -117,15 +135,15 @@ fetched rows / total rows = 4/4 +-----------+----------+--------------+ ``` -Filtering with where clause - +The following example shows how to filter records using `isnotnull` in a `where` clause: + ```ppl source=accounts | where not isnotnull(employer) | fields account_number, employer ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -135,16 +153,37 @@ fetched rows / total rows = 1/1 | 18 | null | +----------------+----------+ ``` + +The `IS NOT NULL` predicate syntax is equivalent to `isnotnull()`: + +```ppl +source=accounts +| where employer IS NOT NULL +| fields account_number, employer +``` + +The query returns the following results: -Using with if() for validation messages +```text +fetched rows / total rows = 3/3 ++----------------+----------+ +| account_number | employer | +|----------------+----------| +| 1 | Pyrami | +| 6 | Netagy | +| 13 | Quility | ++----------------+----------+ +``` +The following example demonstrates using `isnotnull` with the `if` function to create validation messages: + ```ppl source=accounts | eval validation = if(isnotnull(employer), 'valid', 'missing employer') | fields firstname, employer, validation ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -158,10 +197,15 @@ fetched rows / total rows = 4/4 +-----------+----------+------------------+ ``` -## EXISTS +## EXISTS + +**Usage**: Use `isnull(field)` or `isnotnull(field)` to test field existence + +Since OpenSearch doesn't differentiate between null and missing values, functions like `ismissing`/`isnotmissing` are not available. Use `isnull`/`isnotnull` to test field existence instead. -[Since OpenSearch doesn't differentiate null and missing](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html), we can't provide functions like ismissing/isnotmissing to test if a field exists or not. But you can still use isnull/isnotnull for such purpose. -Example, the account 13 doesn't have email field +#### Example + +The following example shows account 13, which doesn't contain an `email` field: ```ppl source=accounts @@ -169,7 +213,7 @@ source=accounts | fields account_number, email ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -180,16 +224,20 @@ fetched rows / total rows = 1/1 +----------------+-------+ ``` -## IFNULL +## IFNULL -### Description +**Usage**: `ifnull(field1, field2)` -Usage: `ifnull(field1, field2)` returns field2 if field1 is null. +Returns `field2` if `field1` is `NULL`. -**Argument type:** All supported data types (NOTE: if two parameters have different types, you will fail semantic check). -**Return type:** `any` +**Parameters**: -### Example +- `field1` (Required): The field to check for `NULL` values. +- `field2` (Required): The value to return if `field1` is `NULL`. + +**Return type**: Any (matches input types) + +#### Example ```ppl source=accounts @@ -197,7 +245,7 @@ source=accounts | fields result, employer, firstname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -211,11 +259,12 @@ fetched rows / total rows = 4/4 +---------+----------+-----------+ ``` -### Nested IFNULL Pattern +#### Nested ifnull pattern -For OpenSearch versions prior to 3.1, COALESCE-like functionality can be achieved using nested IFNULL statements. This pattern is particularly useful in observability use cases where field names may vary across different data sources. +For OpenSearch versions prior to 3.1, `coalesce`-like functionality can be achieved using nested `ifnull` statements. This pattern is particularly useful in observability use cases where field names may vary across different data sources. Usage: `ifnull(field1, ifnull(field2, ifnull(field3, default_value)))` -### Example + +#### Example ```ppl source=accounts @@ -223,7 +272,7 @@ source=accounts | fields result, employer, firstname, lastname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -237,16 +286,20 @@ fetched rows / total rows = 4/4 +---------+----------+-----------+----------+ ``` -## NULLIF +## NULLIF -### Description +**Usage**: `nullif(field1, field2)` -Usage: `nullif(field1, field2)` returns null if two parameters are same, otherwise returns field1. +Returns `NULL` if the two parameters are the same, otherwise returns `field1`. -**Argument type:** All supported data types (NOTE: if two parameters have different types, you will fail semantic check). -**Return type:** `any` +**Parameters**: -### Example +- `field1` (Required): The field to return if different from `field2`. +- `field2` (Required): The value to compare against `field1`. + +**Return type**: Any (matches `field1` type) + +#### Example ```ppl source=accounts @@ -254,7 +307,7 @@ source=accounts | fields result, employer, firstname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -268,16 +321,23 @@ fetched rows / total rows = 4/4 +---------+----------+-----------+ ``` -## IF +## IF -### Description +**Usage**: `if(condition, expr1, expr2)` -Usage: `if(condition, expr1, expr2)` returns expr1 if condition is true, otherwise returns expr2. +Returns `expr1` if the condition is `true`, otherwise returns `expr2`. -**Argument type:** All supported data types (NOTE: if expr1 and expr2 are different types, you will fail semantic check). -**Return type:** `any` +**Parameters**: -### Example +- `condition` (Required): The Boolean expression to evaluate. +- `expr1` (Required): The value to return if the condition is `true`. +- `expr2` (Required): The value to return if the condition is `false`. + +**Return type**: Least restrictive common type of `expr1` and `expr2` + +#### Example + +The following example returns the first name when the condition is `true`: ```ppl source=accounts @@ -285,7 +345,7 @@ source=accounts | fields result, firstname, lastname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -298,14 +358,16 @@ fetched rows / total rows = 4/4 | Dale | Dale | Adams | +---------+-----------+----------+ ``` - + +The following example returns the last name when the condition is `false`: + ```ppl source=accounts | eval result = if(false, firstname, lastname) | fields result, firstname, lastname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -318,14 +380,17 @@ fetched rows / total rows = 4/4 | Adams | Dale | Adams | +--------+-----------+----------+ ``` - + +The following example uses a complex condition to determine VIP status: + ```ppl source=accounts | eval is_vip = if(age > 30 AND isnotnull(employer), true, false) | fields is_vip, firstname, lastname ``` - -Expected output: + + +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -339,30 +404,37 @@ fetched rows / total rows = 4/4 +--------+-----------+----------+ ``` -## CASE +## CASE -### Description +**Usage**: `case(condition1, expr1, condition2, expr2, ... conditionN, exprN else default)` -Usage: `case(condition1, expr1, condition2, expr2, ... conditionN, exprN else default)` returns expr1 if condition1 is true, or returns expr2 if condition2 is true, ... if no condition is true, then returns the value of ELSE clause. If the ELSE clause is not defined, returns NULL. +Returns `expr1` if `condition1` is `true`, `expr2` if `condition2` is `true`, and so on. If no condition is `true`, returns the value of the `else` clause. If the `else` clause is not defined, returns `NULL`. -**Argument type:** All supported data types (NOTE: there is no comma before "else"). -**Return type:** `any` +**Parameters**: -### Limitations +- `condition1, condition2, ..., conditionN` (Required): Boolean expressions to evaluate in sequence. +- `expr1, expr2, ..., exprN` (Required): Values to return when the corresponding condition is `true`. +- `default` (Optional): The value to return when no condition is `true`. If not specified, returns `NULL`. -When each condition is a field comparison with a numeric literal and each result expression is a string literal, the query will be optimized as [range aggregations](https://docs.opensearch.org/latest/aggregations/bucket/range) if pushdown optimization is enabled. However, this optimization has the following limitations: -- Null values will not be grouped into any bucket of a range aggregation and will be ignored -- The default ELSE clause will use the string literal `"null"` instead of actual NULL values - -### Example +**Return type**: Least restrictive common type of all result expressions + +#### Limitations + +When each condition is a field comparison against a numeric literal and each result expression is a string literal, the query is optimized as [range aggregations](https://docs.opensearch.org/latest/aggregations/bucket/range/) if pushdown optimization is enabled. However, this optimization has the following limitations: +- `NULL` values are not grouped into any bucket of a range aggregation and are ignored. +- The default `else` clauses use the string literal `"null"` instead of actual NULL values. +#### Example + +The following example demonstrates a case statement with an else clause: + ```ppl source=accounts | eval result = case(age > 35, firstname, age < 30, lastname else employer) | fields result, firstname, lastname, age, employer ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -375,14 +447,16 @@ fetched rows / total rows = 4/4 | null | Dale | Adams | 33 | null | +--------+-----------+----------+-----+----------+ ``` - + +The following example demonstrates a case statement without an else clause: + ```ppl source=accounts | eval result = case(age > 35, firstname, age < 30, lastname) | fields result, firstname, lastname, age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -395,14 +469,16 @@ fetched rows / total rows = 4/4 | null | Dale | Adams | 33 | +--------+-----------+----------+-----+ ``` - + +The following example uses case in a where clause to filter records: + ```ppl source=accounts | where true = case(age > 35, false, age < 30, false else true) | fields firstname, lastname, age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -414,32 +490,36 @@ fetched rows / total rows = 2/2 +-----------+----------+-----+ ``` -## COALESCE +## COALESCE -### Description +**Usage**: `coalesce(field1, field2, ...)` -Usage: `coalesce(field1, field2, ...)` returns the first non-null, non-missing value in the argument list. +Returns the first non-null, non-missing value in the parameter list. -**Argument type:** All supported data types. Supports mixed data types with automatic type coercion. -**Return type:** Determined by the least restrictive common type among all arguments, with fallback to string if no common type can be determined. -Behavior: -- Returns the first value that is not null and not missing (missing includes non-existent fields) -- Empty strings ("") and whitespace strings (" ") are considered valid values -- If all arguments are null or missing, returns null -- Automatic type coercion is applied to match the determined return type -- If type conversion fails, the value is converted to string representation -- For best results, use arguments of the same data type to avoid unexpected type conversions - -Performance Considerations: -- Optimized for multiple field evaluation, more efficient than nested IFNULL patterns -- Evaluates arguments sequentially, stopping at the first non-null value -- Consider field order based on likelihood of containing values to minimize evaluation overhead - -Limitations: -- Type coercion may result in unexpected string conversions for incompatible types -- Performance may degrade with very large numbers of arguments - -### Example +**Parameters**: + +- `field1, field2, ...` (Required): Fields or expressions to evaluate for non-null values. + +**Return type**: Least restrictive common type of all input parameters + +**Behavior**: +- Returns the first value that is not `NULL` and not missing (missing includes non-existent fields). +- Empty strings (`""`) and whitespace strings (`" "`) are considered valid values. +- If all parameters are `NULL` or missing, returns `NULL`. +- Automatic type coercion is applied to match the determined return type. +- If type conversion fails, the value is converted to string representation. +- For best results, use parameters of the same data type to avoid unexpected type conversions. + +**Performance considerations**: +- Optimized for multiple field evaluation, more efficient than nested `ifnull` patterns. +- Evaluates parameters sequentially, stopping at the first non-null value. +- Consider field order based on likelihood of containing values to minimize evaluation overhead. + +**Limitations**: +- Type coercion may result in unexpected string conversions for incompatible types. +- Performance may degrade when using large numbers of arguments. + +#### Example ```ppl source=accounts @@ -447,7 +527,7 @@ source=accounts | fields result, firstname, lastname, employer ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -461,7 +541,7 @@ fetched rows / total rows = 4/4 +---------+-----------+----------+----------+ ``` -Empty String Handling Examples +#### Empty String Handling Examples ```ppl source=accounts @@ -470,7 +550,7 @@ source=accounts | fields result, empty_field, firstname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -490,7 +570,7 @@ source=accounts | fields result, firstname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -504,7 +584,7 @@ fetched rows / total rows = 4/4 +--------+-----------+ ``` -Mixed Data Types with Auto Coercion +#### Mixed Data Types with Auto Coercion ```ppl source=accounts @@ -512,7 +592,7 @@ source=accounts | fields result, employer, balance ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -526,7 +606,7 @@ fetched rows / total rows = 4/4 +---------+----------+---------+ ``` -Non-existent Field Handling +#### Non-existent Field Handling ```ppl source=accounts @@ -534,7 +614,7 @@ source=accounts | fields result, firstname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -548,17 +628,21 @@ fetched rows / total rows = 4/4 +---------+-----------+ ``` -## ISPRESENT +## ISPRESENT -### Description +**Usage**: `ispresent(field)` -Usage: `ispresent(field)` returns true if the field exists. +Returns `TRUE` if the field exists, `FALSE` otherwise. -**Argument type:** All supported data types. -**Return type:** `BOOLEAN` -**Synonyms:** [ISNOTNULL](#isnotnull) +**Parameters**: -### Example +- `field` (Required): The field to check for existence. + +**Return type**: `BOOLEAN` + +**Synonyms**: [ISNOTNULL](#isnotnull) + +#### Example ```ppl source=accounts @@ -566,7 +650,7 @@ source=accounts | fields employer, firstname ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 3/3 @@ -579,16 +663,19 @@ fetched rows / total rows = 3/3 +----------+-----------+ ``` -## ISBLANK +## ISBLANK -### Description +**Usage**: `isblank(field)` -Usage: `isblank(field)` returns true if the field is null, an empty string, or contains only white space. +Returns `TRUE` if the field is `NULL`, an empty string, or contains only white space. -**Argument type:** All supported data types. -**Return type:** `BOOLEAN` +**Parameters**: -### Example +- `field` (Required): The field to check for blank values. + +**Return type**: `BOOLEAN` + +#### Example ```ppl source=accounts @@ -597,7 +684,7 @@ source=accounts | fields `isblank(temp)`, temp, `isblank(employer)`, employer ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -611,16 +698,19 @@ fetched rows / total rows = 4/4 +---------------+---------+-------------------+----------+ ``` -## ISEMPTY +## ISEMPTY -### Description +**Usage**: `isempty(field)` -Usage: `isempty(field)` returns true if the field is null or is an empty string. +Returns `TRUE` if the field is `NULL` or is an empty string. -**Argument type:** All supported data types. -**Return type:** `BOOLEAN` +**Parameters**: -### Example +- `field` (Required): The field to check for empty values. + +**Return type**: `BOOLEAN` + +#### Example ```ppl source=accounts @@ -629,7 +719,7 @@ source=accounts | fields `isempty(temp)`, temp, `isempty(employer)`, employer ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -643,37 +733,37 @@ fetched rows / total rows = 4/4 +---------------+---------+-------------------+----------+ ``` -## EARLIEST +## EARLIEST -### Description +**Usage**: `earliest(relative_string, field)` -Usage: `earliest(relative_string, field)` returns true if the value of field is after the timestamp derived from relative_string relative to the current time. Otherwise, returns false. -relative_string: -The relative string can be one of the following formats: -1. `"now"` or `"now()"`: - - Uses the current system time. -2. Absolute format (`MM/dd/yyyy:HH:mm:ss` or `yyyy-MM-dd HH:mm:ss`): - - Converts the string to a timestamp and compares it with the data. -3. Relative format: `(+|-)[+<...>]@` - - Steps to specify a relative time: - - **a. Time offset:** Indicate the offset from the current time using `+` or `-`. - - **b. Time amount:** Provide a numeric value followed by a time unit (`s`, `m`, `h`, `d`, `w`, `M`, `y`). - - **c. Snap to unit:** Optionally specify a snap unit with `@` to round the result down to the nearest unit (e.g., hour, day, month). - - **Examples** (assuming current time is `2025-05-28 14:28:34`): - - `-3d+2y` → `2027-05-25 14:28:34` - - `+1d@m` → `2025-05-29 14:28:00` - - `-3M+1y@M` → `2026-02-01 00:00:00` - -Read more details [here](https://github.com/opensearch-project/opensearch-spark/blob/main/docs/ppl-lang/functions/ppl-datetime.md#relative_timestamp) +Returns `TRUE` if the field value is after the timestamp derived from `relative_string` relative to the current time, `FALSE` otherwise. -**Argument type:** `relative_string`: `STRING`, `field`: `TIMESTAMP` -**Return type:** `BOOLEAN` +**Parameters**: -### Example +- `relative_string` (Required): The reference time specification in one of the supported formats. +- `field` (Required): The timestamp field to compare against the reference time. + +**Return type**: `BOOLEAN` + +**Relative string formats**: +1. `"now"` or `"now()"`: Uses the current system time. +2. Absolute format (`MM/dd/yyyy:HH:mm:ss` or `yyyy-MM-dd HH:mm:ss`): Converts the string to a timestamp and compares it against the field value. +3. Relative format: `(+|-)[+<...>]@` + +**Steps to specify a relative time**: +- **Time offset**: Indicate the offset from the current time using `+` or `-`. +- **Time amount**: Provide a numeric value followed by a time unit (`s`, `m`, `h`, `d`, `w`, `M`, `y`). +- **Snap to unit**: Optionally, specify a snap unit using `@` to round the result down to the nearest unit (for example, hour, day, month). + +**Examples** (assuming current time is `2025-05-28 14:28:34`): +- `-3d+2y` → `2027-05-25 14:28:34`. +- `+1d@m` → `2025-05-29 14:28:00`. +- `-3M+1y@M` → `2026-02-01 00:00:00`. + +#### Example + +The following example compares timestamps against current time and relative time: ```ppl source=accounts @@ -683,7 +773,7 @@ source=accounts | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -693,14 +783,16 @@ fetched rows / total rows = 1/1 | False | True | +-------+------+ ``` - + +The following example filters records using an absolute time format: + ```ppl source=nyc_taxi | where earliest('07/01/2014:00:30:00', timestamp) | stats COUNT() as cnt ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -711,16 +803,22 @@ fetched rows / total rows = 1/1 +-----+ ``` -## LATEST +## LATEST -### Description +**Usage**: `latest(relative_string, field)` -Usage: `latest(relative_string, field)` returns true if the value of field is before the timestamp derived from relative_string relative to the current time. Otherwise, returns false. +Returns `TRUE` if the field value is before the timestamp derived from `relative_string` relative to the current time, `FALSE` otherwise. -**Argument type:** `relative_string`: `STRING`, `field`: `TIMESTAMP` -**Return type:** `BOOLEAN` +**Parameters**: -### Example +- `relative_string` (Required): The reference time specification in one of the supported formats. +- `field` (Required): The timestamp field to compare against the reference time. + +**Return type**: `BOOLEAN` + +#### Example + +The following example compares timestamps using the latest function: ```ppl source=accounts @@ -730,7 +828,7 @@ source=accounts | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -740,14 +838,16 @@ fetched rows / total rows = 1/1 | True | True | +------+------+ ``` - + +The following example filters records using latest with an absolute time format: + ```ppl source=nyc_taxi | where latest('07/21/2014:04:00:00', timestamp) | stats COUNT() as cnt ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -777,7 +877,7 @@ Syntax: ` contains ''` ### Example -Basic substring filter: +The following example filters accounts using a substring match to find names containing 'mbe': ```ppl source=accounts @@ -785,7 +885,7 @@ source=accounts | fields firstname, age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -796,7 +896,7 @@ fetched rows / total rows = 1/1 +-----------+-----+ ``` -Case-insensitive matching (all of the following are equivalent): +The following queries are all equivalent due to case-insensitive matching: ```ppl ignore source=accounts | where firstname contains 'mbe' @@ -804,7 +904,7 @@ source=accounts | where firstname CONTAINS 'MBE' source=accounts | where firstname Contains 'Mbe' ``` -Combining with other conditions: +The following example combines substring filtering with other conditions: ```ppl source=accounts @@ -812,7 +912,7 @@ source=accounts | fields firstname, employer, age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -823,73 +923,75 @@ fetched rows / total rows = 1/1 +-----------+----------+-----+ ``` -## REGEXP_MATCH +## REGEXP_MATCH -### Description +**Usage**: `regexp_match(string, pattern)` -Usage: `regexp_match(string, pattern)` returns true if the regular expression pattern finds a match against any substring of the string value, otherwise returns false. -The function uses Java regular expression syntax for the pattern. +Returns `TRUE` if the regular expression pattern finds a match against any substring of the string value, otherwise returns `FALSE`. The function uses Java regular expression syntax for the pattern. -**Argument type:** `STRING`, `STRING` -**Return type:** `BOOLEAN` +**Parameters**: -### Example +- `string` (Required): The string to search within. +- `pattern` (Required): The regular expression pattern to match against. + +**Return type**: `BOOLEAN` + +#### Example + +The following example filters log messages using a regex pattern: -``` ppl ignore -source=logs | where regexp_match(message, 'ERROR|WARN|FATAL') | fields timestamp, message +```ppl +source=logs +| where regexp_match(message, 'ERROR|WARN|FATAL') +| fields timestamp, message ``` + -```text -fetched rows / total rows = 3/100 -+---------------------+------------------------------------------+ -| timestamp | message | -|---------------------+------------------------------------------| -| 2024-01-15 10:23:45 | ERROR: Connection timeout to database | -| 2024-01-15 10:24:12 | WARN: High memory usage detected | -| 2024-01-15 10:25:33 | FATAL: System crashed unexpectedly | -+---------------------+------------------------------------------+ -``` +| timestamp | message | +| --- | --- | +| 2024-01-15 10:23:45 | ERROR: Connection timeout to database | +| 2024-01-15 10:24:12 | WARN: High memory usage detected | +| 2024-01-15 10:25:33 | FATAL: System crashed unexpectedly | + +The following example uses regex to validate email addresses: -``` ppl ignore -source=users | where regexp_match(email, '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') | fields name, email +```ppl +source=users +| where regexp_match(email, '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') +| fields name, email ``` + -```text -fetched rows / total rows = 2/3 -+-------+----------------------+ -| name | email | -|-------+----------------------| -| John | john@example.com | -| Alice | alice@company.org | -+-------+----------------------+ -``` +| name | email | +| --- | --- | +| John | john@example.com | +| Alice | alice@company.org | + +The following example filters for valid public IP addresses using regex: -```ppl ignore -source=network | where regexp_match(ip_address, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') AND NOT regexp_match(ip_address, '^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)') | fields ip_address, status +```ppl +source=network +| where regexp_match(ip_address, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') AND NOT regexp_match(ip_address, '^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)') +| fields ip_address, status ``` + -```text -fetched rows / total rows = 2/10 -+---------------+--------+ -| ip_address | status | -|---------------+--------| -| 8.8.8.8 | active | -| 1.1.1.1 | active | -+---------------+--------+ -``` +| ip_address | status | +| --- | --- | +| 8.8.8.8 | active | +| 1.1.1.1 | active | + +The following example uses regex for product categorization with case-insensitive matching: -```ppl ignore -source=products | eval category = if(regexp_match(name, '(?i)(laptop|computer|desktop)'), 'Computing', if(regexp_match(name, '(?i)(phone|tablet|mobile)'), 'Mobile', 'Other')) | fields name, category +```ppl +source=products +| eval category = if(regexp_match(name, '(?i)(laptop|computer|desktop)'), 'Computing', if(regexp_match(name, '(?i)(phone|tablet|mobile)'), 'Mobile', 'Other')) +| fields name, category ``` + -```text -fetched rows / total rows = 4/4 -+------------------------+----------+ -| name | category | -|------------------------+----------| -| Dell Laptop XPS | Computing| -| iPhone 15 Pro | Mobile | -| Wireless Mouse | Other | -| Desktop Computer Tower | Computing| -+------------------------+----------+ -``` \ No newline at end of file +| name | category | +| --- | --- | +| Dell Laptop XPS | Computing | +| iPhone 15 Pro | Mobile | +| Wireless Mouse | Other | \ No newline at end of file diff --git a/docs/user/ppl/functions/conversion.md b/docs/user/ppl/functions/conversion.md index 99efe161033..7654932129c 100644 --- a/docs/user/ppl/functions/conversion.md +++ b/docs/user/ppl/functions/conversion.md @@ -1,10 +1,21 @@ -# Type Conversion Functions +# Type conversion functions -## CAST +The following type conversion functions are supported in PPL. -### Description +## CAST -Usage: `cast(expr as dateType)` cast the expr to dataType. return the value of dataType. The following conversion rules are used: +**Usage**: `cast(expr as dataType)` + +Casts the expression to the specified data type and returns the converted value. + +**Parameters**: + +- `expr` (Required): The expression to cast to a different data type. +- `dataType` (Required): The target data type for the cast operation. + +**Return type**: Specified by data type + +The following table shows the conversion rules used for casting between data types: | Src/Target | STRING | NUMBER | BOOLEAN | TIMESTAMP | DATE | TIME | IP | | --- | --- | --- | --- | --- | --- | --- | --- | @@ -16,11 +27,12 @@ Usage: `cast(expr as dateType)` cast the expr to dataType. return the value of d | TIME | Note1 | N/A | N/A | N/A | N/A | | N/A | | IP | Note2 | N/A | N/A | N/A | N/A | N/A | | -Note1: the conversion follow the JDK specification. -Note2: IP will be converted to its canonical representation. Canonical representation -for IPv6 is described in [RFC 5952](https://datatracker.ietf.org/doc/html/rfc5952). +Note1: The conversion follows the JDK specification. +Note2: IP addresses are converted to their canonical representation. The canonical representation for IPv6 is described in [RFC 5952](https://datatracker.ietf.org/doc/html/rfc5952). + +#### Example -### Example: Cast to string +The following example casts different data types to string: ```ppl source=people @@ -28,7 +40,7 @@ source=people | fields `cbool`, `cint`, `cdate` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -39,7 +51,7 @@ fetched rows / total rows = 1/1 +-------+------+------------+ ``` -### Example: Cast to number +The following example casts values to integer type: ```ppl source=people @@ -47,7 +59,7 @@ source=people | fields `cbool`, `cstring` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -58,7 +70,7 @@ fetched rows / total rows = 1/1 +-------+---------+ ``` -### Example: Cast to date +The following example casts strings to date, time, and timestamp types: ```ppl source=people @@ -66,7 +78,7 @@ source=people | fields `cdate`, `ctime`, `ctimestamp` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -77,7 +89,7 @@ fetched rows / total rows = 1/1 +------------+----------+---------------------+ ``` -### Example: Cast function can be chained +The following example demonstrates chaining cast functions: ```ppl source=people @@ -85,7 +97,7 @@ source=people | fields `cbool` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -96,20 +108,19 @@ fetched rows / total rows = 1/1 +-------+ ``` -## IMPLICIT (AUTO) TYPE CONVERSION +## Implicit type conversion + +Implicit conversion is automatic casting. When a function does not have an exact match for the input types, the engine looks for another signature that can safely handle the values. It selects the option that requires the least conversion of the original types, so you can mix literals and fields without adding explicit `cast` functions. -Implicit conversion is automatic casting. When a function does not have an exact match for the -input types, the engine looks for another signature that can safely work with the values. It picks -the option that requires the least stretching of the original types, so you can mix literals and -fields without adding `CAST` everywhere. +### String to numeric type conversion -### String to numeric +When a string is used where a numeric value is expected, the engine attempts to parse the string as a number: +- The string must represent a valid numeric value, such as `"3.14"` or `"42"`. Any other value causes the query to fail. +- If a string is used alongside numeric arguments, the engine treats it as a `DOUBLE` so that the numeric overload of the function can be applied. -When a string stands in for a number we simply parse the text: -- The value must be something like `"3.14"` or `"42"`. Anything else causes the query to fail. -- If a string appears next to numeric arguments, it is treated as a `DOUBLE` so the numeric overload of the function can run. +#### Example -### Example: Use string in arithmetic operator +The following example demonstrates using strings in arithmetic operations: ```ppl source=people @@ -117,7 +128,7 @@ source=people | fields divide, multiply, add, minus, concat ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -128,7 +139,7 @@ fetched rows / total rows = 1/1 +--------+----------+------+-------+--------+ ``` -### Example: Use string in comparison operator +The following example demonstrates using strings in comparison operations: ```ppl source=people @@ -136,7 +147,7 @@ source=people | fields e, en, ed, edn, l, ld, i ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -147,34 +158,30 @@ fetched rows / total rows = 1/1 +------+-------+------+-------+------+------+------+ ``` -## TOSTRING +## TOSTRING -### Description +**Usage**: `tostring(value[, format])` -The following usage options are available, depending on the parameter types and the number of parameters. +Converts the value to a string representation. If a format is provided, converts numbers to the specified format type. For Boolean values, converts to `TRUE` or `FALSE`. -Usage with format type: `tostring(ANY, [format])`: Converts the value in first argument to provided format type string in second argument. If second argument is not provided, then it converts to default string representation. +**Parameters**: -**Return type:** `STRING` +- `value` (Required): The value to convert to string (any data type). +- `format` (Optional): The format type for number conversion. This parameter is only used when `value` is a number. If `value` is a Boolean, this parameter is ignored. -Usage for boolean parameter without format type `tostring(boolean)`: Converts the string to 'TRUE' or 'FALSE'. - -**Return type:** `STRING` +Format types: -You can use this function with the eval commands and as part of eval expressions. If first argument can be any valid type, second argument is optional and if provided, it needs to be format name to convert to where first argument contains only numbers. If first argument is boolean, then second argument is not used even if its provided. +- `binary`: Converts a number to a binary value. +- `hex`: Converts the number to a hexadecimal value. +- `commas`: Formats the number using commas. If the number includes a decimal, the function rounds the number to the nearest two decimal places. +- `duration`: Converts the value in seconds to the readable time format `HH:MM:SS`. +- `duration_millis`: Converts the value in milliseconds to the readable time format `HH:MM:SS`. -Format types: -1. "binary" Converts a number to a binary value. -2. "hex" Converts the number to a hexadecimal value. -3. "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. -4. "duration" Converts the value in seconds to the readable time format HH:MM:SS. -5. "duration_millis" Converts the value in milliseconds to the readable time format HH:MM:SS. - -The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. +**Return type**: `STRING` -### Example: Convert number to binary string +#### Example -You can use this function to convert a number to a string of its binary representation. +The following example converts a number to its binary string representation: ```ppl source=accounts @@ -183,7 +190,7 @@ source=accounts | fields firstname, balance_binary, balance ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -194,9 +201,7 @@ fetched rows / total rows = 1/1 +-----------+------------------+---------+ ``` -### Example: Convert number to hex string - -You can use this function to convert a number to a string of its hex representation. +The following example converts a number to its hexadecimal string representation: ```ppl source=accounts @@ -205,7 +210,7 @@ source=accounts | fields firstname, balance_hex, balance ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -216,9 +221,7 @@ fetched rows / total rows = 1/1 +-----------+-------------+---------+ ``` -### Example: Format number with commas - -The following example formats the column totalSales to display values with commas. +The following example formats numbers with comma separators: ```ppl source=accounts @@ -227,7 +230,7 @@ source=accounts | fields firstname, balance_commas, balance ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -237,10 +240,10 @@ fetched rows / total rows = 1/1 | Amber | 39,225 | 39225 | +-----------+----------------+---------+ ``` - -### Example: Convert seconds to duration format -The following example converts number of seconds to HH:MM:SS format representing hours, minutes and seconds. +### Example: Convert seconds to duration format + +The following example converts the number of seconds to the `HH:MM:SS` format representing hours, minutes, and seconds: ```ppl source=accounts @@ -249,7 +252,7 @@ source=accounts | fields firstname, duration ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -260,9 +263,7 @@ fetched rows / total rows = 1/1 +-----------+----------+ ``` -### Example: Convert boolean to string - -The following example converts boolean parameter to string. +The following example converts a Boolean value to string: ```ppl source=accounts @@ -271,7 +272,7 @@ source=accounts | fields `boolean_str` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -284,22 +285,35 @@ fetched rows / total rows = 1/1 ## TONUMBER -### Description +**Usage**: `tonumber(string[, base])` + +Converts the string value to a number. The optional `base` parameter specifies the base of the input string. If not provided, the function assumes base `10`. + +**Parameters**: -Usage: `tonumber(string, [base])` converts the value in first argument. -The second argument describes the base of first argument. If second argument is not provided, then it converts to base 10 number representation. +- `string` (Required): The string representation of the number to convert. +- `base` (Optional): The base of the input string (between `2` and `36`). Defaults to `10`. -**Return type:** `NUMBER` +**Return type**: `NUMBER` -You can use this function with the eval commands and as part of eval expressions. Base values can be between 2 and 36. The maximum value supported for base 10 is +(2-2^-52)·2^1023 and minimum is -(2-2^-52)·2^1023. The maximum for other supported bases is 2^63-1 (or 7FFFFFFFFFFFFFFF) and minimum is -2^63 (or -7FFFFFFFFFFFFFFF). If the tonumber function cannot parse a field value to a number, the function returns NULL. You can use this function to convert a string representation of a binary number to return the corresponding number in base 10. +You can use this function with `eval` commands and as part of `eval` expressions. Base values can be between `2` and `36`. -### Example: Convert binary string to number +**Value limits**: +- Base 10: Maximum is +(2-2^-52)·2^1023 and minimum is -(2-2^-52)·2^1023. +- Other bases: Maximum is 2^63-1 (or 7FFFFFFFFFFFFFFF) and minimum is -2^63 (or -7FFFFFFFFFFFFFFF). + +If the `tonumber` function cannot parse a field value to a number, the function returns `NULL`. You can use this function to convert string representations of numbers in various bases to their corresponding base 10 values. + +#### Example: Convert a binary string to a number ```ppl -source=people | eval int_value = tonumber('010101',2) | fields int_value | head 1 +source=people +| eval int_value = tonumber('010101',2) +| fields int_value +| head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -310,13 +324,16 @@ fetched rows / total rows = 1/1 +-----------+ ``` -### Example: Convert hex string to number +#### Example: Convert a hexadecimal string to a number ```ppl -source=people | eval int_value = tonumber('FA34',16) | fields int_value | head 1 +source=people +| eval int_value = tonumber('FA34',16) +| fields int_value +| head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -327,13 +344,16 @@ fetched rows / total rows = 1/1 +-----------+ ``` -### Example: Convert decimal string to number +#### Example: Convert a decimal string without a decimal part to a number ```ppl -source=people | eval int_value = tonumber('4598') | fields int_value | head 1 +source=people +| eval int_value = tonumber('4598') +| fields int_value +| head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -344,13 +364,16 @@ fetched rows / total rows = 1/1 +-----------+ ``` -### Example: Convert decimal string with fraction to number +#### Example: Convert a decimal string with a decimal part to a number ```ppl -source=people | eval double_value = tonumber('4598.678') | fields double_value | head 1 +source=people +| eval double_value = tonumber('4598.678') +| fields double_value +| head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 diff --git a/docs/user/ppl/functions/cryptographic.md b/docs/user/ppl/functions/cryptographic.md index 1ea1ca50f5c..25c38df2d33 100644 --- a/docs/user/ppl/functions/cryptographic.md +++ b/docs/user/ppl/functions/cryptographic.md @@ -1,16 +1,20 @@ -# PPL Cryptographic Functions +# Cryptographic functions -## MD5 +The following cryptographic functions are supported in PPL. -### Description +## MD5 -Version: 3.1.0 -Usage: `md5(str)` calculates the MD5 digest and returns the value as a 32-character hex string. +**Usage**: `MD5(str)` -**Argument type:** `STRING` -**Return type:** `STRING` +Calculates the MD5 digest and returns the value as a 32-character hex string. -### Example +**Parameters**: + +- `str` (Required): The string for which to calculate the MD5 digest. + +**Return type**: `STRING` + +#### Example ```ppl source=people @@ -18,7 +22,7 @@ source=people | fields `MD5('hello')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -29,17 +33,19 @@ fetched rows / total rows = 1/1 +----------------------------------+ ``` -## SHA1 +## SHA1 + +**Usage**: `SHA1(str)` -### Description +Returns the SHA-1 hash as a hex string. -Version: 3.1.0 -Usage: `sha1(str)` returns the hex string result of SHA-1. +**Parameters**: -**Argument type:** `STRING` -**Return type:** `STRING` +- `str` (Required): The string for which to calculate the SHA-1 hash. -### Example +**Return type**: `STRING` + +#### Example ```ppl source=people @@ -47,7 +53,7 @@ source=people | fields `SHA1('hello')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -58,18 +64,20 @@ fetched rows / total rows = 1/1 +------------------------------------------+ ``` -## SHA2 +## SHA2 + +**Usage**: `SHA2(str, numBits)` -### Description +Returns the result of SHA-2 family hash functions (SHA-224, SHA-256, SHA-384, and SHA-512) as a hex string. -Version: 3.1.0 -Usage: `sha2(str, numBits)` returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512). -The numBits indicates the desired bit length of the result, which must have a value of 224, 256, 384, or 512. +**Parameters**: -**Argument type:** `STRING`, `INTEGER` -**Return type:** `STRING` +- `str` (Required): The string for which to calculate the SHA-2 hash. +- `numBits` (Required): The desired bit length of the result, which must be `224`, `256`, `384`, or `512`. -### Example +**Return type**: `STRING` + +#### Example: SHA-256 hash ```ppl source=people @@ -77,7 +85,7 @@ source=people | fields `SHA2('hello',256)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -87,6 +95,8 @@ fetched rows / total rows = 1/1 | 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 | +------------------------------------------------------------------+ ``` + +#### Example: SHA-512 hash ```ppl source=people @@ -94,7 +104,7 @@ source=people | fields `SHA2('hello',512)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 diff --git a/docs/user/ppl/functions/datetime.md b/docs/user/ppl/functions/datetime.md index 9ed105ea91a..3f53679b39c 100644 --- a/docs/user/ppl/functions/datetime.md +++ b/docs/user/ppl/functions/datetime.md @@ -1,22 +1,25 @@ -# Date and Time Functions +# Date and time functions - All PPL date and time functions use the UTC time zone. Both input and output values are interpreted as UTC. - For instance, an input timestamp literal like '2020-08-26 01:01:01' is assumed to be in UTC, and the now() - function also returns the current date and time in UTC. +All PPL date and time functions use the UTC time zone. Both input and output values are interpreted as UTC. For example, an input timestamp literal such as `'2020-08-26 01:01:01'` is assumed to be in UTC, and the `now()` function also returns the current date and time in UTC. -## ADDDATE +The following date and time functions are supported in PPL. -### Description +## ADDDATE + +**Usage**: `ADDDATE(date, INTERVAL expr unit)`, `ADDDATE(date, days)` + +Adds the interval or number of days to the date. The first form adds an interval to the date, the second form adds the specified number of days as an integer to the date. If the first argument is `TIME`, today's date is used. If the first argument is `DATE`, the time at midnight is used. + +**Parameters**: + +- `date` (Required): The date, timestamp, or time value to modify. +- `INTERVAL expr unit` (Required in first form): The interval to add to the date. +- `days` (Required in second form): The number of days to add as an integer. + +**Return type**: `TIMESTAMP` for the interval form, `DATE` for the integer days form when the input is `DATE`, `TIMESTAMP` when the input is `TIMESTAMP` or `TIME`. + +Synonyms: [`DATE_ADD`](#date_add) (when used in interval form) -Usage: `adddate(date, INTERVAL expr unit)` / adddate(date, days) adds the interval of second argument to date; adddate(date, days) adds the second argument as integer number of days to date. -If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. -**Argument type:** `DATE/TIMESTAMP/TIME, INTERVAL/LONG` -Return type map: -(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP -(DATE, LONG) -> DATE -(TIMESTAMP/TIME, LONG) -> TIMESTAMP -Synonyms: [DATE_ADD](#date_add) when invoked with the INTERVAL form of the second argument. -Antonyms: [SUBDATE](#subdate) ### Example ```ppl @@ -25,7 +28,7 @@ source=people | fields `'2020-08-26' + 1h`, `'2020-08-26' + 1`, `ts '2020-08-26 01:01:01' + 1` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -36,25 +39,30 @@ fetched rows / total rows = 1/1 +---------------------+------------------+------------------------------+ ``` -## ADDTIME +## ADDTIME -### Description +**Usage**: `ADDTIME(expr1, expr2)` + +Adds the second expression to the first expression and returns the result. If an argument is `TIME`, today's date is used. If an argument is `DATE`, the time at midnight is used. + +**Parameters**: + +- `expr1` (Required): The base date, timestamp, or time value. +- `expr2` (Required): The date, timestamp, or time value to add to the first expression. + +**Return type**: `TIMESTAMP` when the first argument is `DATE` or `TIMESTAMP`, `TIME` when the first argument is `TIME`. + +#### Examples + +The following example shows adding two DATE values: -Usage: `addtime(expr1, expr2)` adds expr2 to expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. -**Argument type:** `DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME` -Return type map: -(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP -(TIME, DATE/TIMESTAMP/TIME) -> TIME -Antonyms: [SUBTIME](#subtime) -### Example - ```ppl source=people | eval `'2008-12-12' + 0` = ADDTIME(DATE('2008-12-12'), DATE('2008-11-15')) | fields `'2008-12-12' + 0` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -64,6 +72,8 @@ fetched rows / total rows = 1/1 | 2008-12-12 00:00:00 | +---------------------+ ``` + +The following example shows adding TIME and DATE values: ```ppl source=people @@ -71,7 +81,7 @@ source=people | fields `'23:59:59' + 0` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -81,6 +91,8 @@ fetched rows / total rows = 1/1 | 23:59:59 | +----------------+ ``` + +The following example shows combining DATE and TIME into a timestamp: ```ppl source=people @@ -88,7 +100,7 @@ source=people | fields `'2004-01-01' + '23:59:59'` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -98,6 +110,8 @@ fetched rows / total rows = 1/1 | 2004-01-01 23:59:59 | +---------------------------+ ``` + +The following example shows adding two TIME values: ```ppl source=people @@ -105,7 +119,7 @@ source=people | fields `'10:20:30' + '00:05:42'` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -115,6 +129,8 @@ fetched rows / total rows = 1/1 | 10:26:12 | +-------------------------+ ``` + +The following example shows adding two TIMESTAMP values: ```ppl source=people @@ -122,7 +138,7 @@ source=people | fields `'2007-02-28 10:20:30' + '20:40:50'` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -133,23 +149,29 @@ fetched rows / total rows = 1/1 +------------------------------------+ ``` -## CONVERT_TZ +## CONVERT_TZ -### Description +**Usage**: `CONVERT_TZ(timestamp, from_timezone, to_timezone)` + +Constructs a local timestamp converted from the source time zone to the target time zone. Returns `NULL` when any of the three function arguments is invalid: the timestamp is not in the format `yyyy-MM-dd HH:mm:ss`, a time zone is not in `(+/-)HH:mm` format, dates are invalid (such as February 30th), or time zones are outside the valid range of -13:59 to +14:00. + +**Parameters**: + +- `timestamp` (Required): The timestamp or string to convert in `yyyy-MM-dd HH:mm:ss` format. +- `from_timezone` (Required): The source time zone in `(+/-)HH:mm` format. +- `to_timezone` (Required): The target time zone in `(+/-)HH:mm` format. + +**Return type**: `TIMESTAMP` + +#### Examples -Usage: `convert_tz(timestamp, from_timezone, to_timezone)` constructs a local timestamp converted from the from_timezone to the to_timezone. CONVERT_TZ returns null when any of the three function arguments are invalid, i.e. timestamp is not in the format yyyy-MM-dd HH:mm:ss or the timezone is not in (+/-)HH:mm. It also is invalid for invalid dates, such as February 30th and invalid timezones, which are ones outside of -13:59 and +14:00. -**Argument type:** `TIMESTAMP/STRING, STRING, STRING` -**Return type:** `TIMESTAMP` -Conversion from +00:00 timezone to +10:00 timezone. Returns the timestamp argument converted from +00:00 to +10:00 -### Example - ```ppl source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | fields `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -160,8 +182,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +15:00 in this example will return null. -### Example +The valid time zone range for `convert_tz` is (-13:59, +14:00) inclusive. Time zones outside of the range, such as +15:00 in this example, return `NULL`: ```ppl source=people @@ -169,7 +190,7 @@ source=people | fields `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -180,8 +201,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -Conversion from a positive timezone to a negative timezone that goes over date line. -### Example +The following example shows conversion from a positive time zone to a negative time zone that crosses the date line: ```ppl source=people @@ -189,7 +209,7 @@ source=people | fields `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -200,8 +220,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -Valid dates are required in convert_tz, invalid dates such as April 31st (not a date in the Gregorian calendar) will result in null. -### Example +Valid dates are required in `convert_tz`. For invalid dates such as April 31st (not a date in the Gregorian calendar), `NULL` is returned: ```ppl source=people @@ -209,7 +228,7 @@ source=people | fields `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -220,8 +239,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -Valid dates are required in convert_tz, invalid dates such as February 30th (not a date in the Gregorian calendar) will result in null. -### Example +The following example shows that February 30th also returns `NULL`: ```ppl source=people @@ -229,7 +247,7 @@ source=people | fields `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -240,8 +258,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -February 29th 2008 is a valid date because it is a leap year. -### Example +February 29th 2008 is a valid date because it is a leap year: ```ppl source=people @@ -249,7 +266,7 @@ source=people | fields `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -260,8 +277,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -Valid dates are required in convert_tz, invalid dates such as February 29th 2007 (2007 is not a leap year) will result in null. -### Example +The following example shows that February 29th 2007 returns `NULL` because 2007 is not a leap year: ```ppl source=people @@ -269,7 +285,7 @@ source=people | fields `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -280,8 +296,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:01 in this example will return null. -### Example +The valid time zone range for `convert_tz` is [-13:59, +14:00] inclusive. Time zones outside of the range, such as +14:01 in this example, return `NULL`: ```ppl source=people @@ -289,7 +304,7 @@ source=people | fields `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -300,8 +315,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:00 in this example will return a correctly converted date time object. -### Example +The valid time zone range for `convert_tz` is (-13:59, +14:00) inclusive. Time zones within the range, such as +14:00 in this example, return a correctly converted date time object: ```ppl source=people @@ -309,7 +323,7 @@ source=people | fields `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -320,8 +334,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as -14:00 will result in null -### Example +The following example shows that -14:00 (outside the valid range) returns `NULL`: ```ppl source=people @@ -329,7 +342,7 @@ source=people | fields `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -340,8 +353,7 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. This timezone is within range so it is valid and will convert the time. -### Example +The valid time zone range for `convert_tz` is [-13:59, +14:00] inclusive. Time zones at the lower boundary of the range, such as -13:59, are valid and return converted results: ```ppl source=people @@ -349,7 +361,7 @@ source=people | fields `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -360,14 +372,16 @@ fetched rows / total rows = 1/1 +-----------------------------------------------------+ ``` -## CURDATE +## CURDATE + +**Usage**: `CURDATE()` -### Description +Returns the current date as a value in `YYYY-MM-DD` format. The function returns the current date in UTC at the time when the statement is executed. + +**Parameters**: None + +**Return type**: `DATE` -Returns the current date as a value in 'YYYY-MM-DD' format. -CURDATE() returns the current date in UTC at the time the statement is executed. -**Return type:** `DATE` -Specification: CURDATE() -> DATE ### Example ```ppl ignore @@ -376,7 +390,7 @@ source=people | fields `CURDATE()` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -387,11 +401,16 @@ fetched rows / total rows = 1/1 +------------+ ``` -## CURRENT_DATE +## CURRENT_DATE + +**Usage**: `CURRENT_DATE()` -### Description +A synonym for `CURDATE()`. + +**Parameters**: None + +**Return type**: `DATE` -`CURRENT_DATE()` is a synonym for [CURDATE()](#curdate). ### Example ```ppl ignore @@ -400,7 +419,7 @@ source=people | fields `CURRENT_DATE()` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -411,11 +430,16 @@ fetched rows / total rows = 1/1 +------------------+ ``` -## CURRENT_TIME +## CURRENT_TIME + +**Usage**: `CURRENT_TIME()` + +A synonym for `CURTIME()`. + +**Parameters**: None -### Description +**Return type**: `TIME` -`CURRENT_TIME()` is a synonym for [CURTIME()](#curtime). ### Example ```ppl ignore @@ -424,7 +448,7 @@ source=people | fields `CURRENT_TIME()` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -435,11 +459,16 @@ fetched rows / total rows = 1/1 +------------------+ ``` -## CURRENT_TIMESTAMP +## CURRENT_TIMESTAMP -### Description +**Usage**: `CURRENT_TIMESTAMP()` + +A synonym for `NOW()`. + +**Parameters**: None + +**Return type**: `TIMESTAMP` -`CURRENT_TIMESTAMP()` is a synonym for [NOW()](#now). ### Example ```ppl ignore @@ -448,7 +477,7 @@ source=people | fields `CURRENT_TIMESTAMP()` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -459,23 +488,25 @@ fetched rows / total rows = 1/1 +-----------------------+ ``` -## CURTIME +## CURTIME -### Description +**Usage**: `CURTIME()` + +Returns the current time as a value in the `hh:mm:ss` format in the UTC time zone. `CURTIME()` returns the time at which the statement began to execute, as [`NOW()`](#now) does. + +**Parameters**: None + +**Return type**: `TIME` + +#### Example -Returns the current time as a value in 'hh:mm:ss' format in the UTC time zone. -CURTIME() returns the time at which the statement began to execute as [NOW()](#now) does. -**Return type:** `TIME` -Specification: CURTIME() -> TIME -### Example - ```ppl ignore source=people | eval `value_1` = CURTIME(), `value_2` = CURTIME() | fields `value_1`, `value_2` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -486,22 +517,28 @@ fetched rows / total rows = 1/1 +----------+----------+ ``` -## DATE +## DATE -### Description +**Usage**: `DATE(expr)` + +Constructs a date type from the input string `expr`. If the argument is a date or timestamp, it extracts the date value part from the expression. + +**Parameters**: +- `expr` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `DATE` + +#### Examples + +The following example extracts a date from a string: -Usage: `date(expr)` constructs a date type with the input string expr as a date. If the argument is of date/timestamp, it extracts the date value part from the expression. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `DATE` -### Example - ```ppl source=people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -511,6 +548,8 @@ fetched rows / total rows = 1/1 | 2020-08-26 | +--------------------+ ``` + +The following example extracts the date from a timestamp: ```ppl source=people @@ -518,7 +557,7 @@ source=people | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -528,6 +567,8 @@ fetched rows / total rows = 1/1 | 2020-08-26 | +----------------------------------------+ ``` + +The following example extracts the date from a string containing both date and time: ```ppl source=people @@ -535,24 +576,7 @@ source=people | fields `DATE('2020-08-26 13:49')` ``` -Expected output: - -```text -fetched rows / total rows = 1/1 -+--------------------------+ -| DATE('2020-08-26 13:49') | -|--------------------------| -| 2020-08-26 | -+--------------------------+ -``` - -```ppl -source=people -| eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') -| fields `DATE('2020-08-26 13:49')` -``` - -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -563,16 +587,22 @@ fetched rows / total rows = 1/1 +--------------------------+ ``` -## DATE_ADD +## DATE_ADD -### Description +**Usage**: `DATE_ADD(date, INTERVAL expr unit)` -Usage: `date_add(date, INTERVAL expr unit)` adds the interval expr to date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. -**Argument type:** `DATE/TIMESTAMP/TIME, INTERVAL` -**Return type:** `TIMESTAMP` -Synonyms: [ADDDATE](#adddate) -Antonyms: [DATE_SUB](#date_sub) -### Example +Adds the interval `expr` to `date`. If the first argument is `TIME`, today's date is used. If the first argument is `DATE`, the time at midnight is used. + +**Parameters**: +- `date` (Required): A `DATE`, `TIMESTAMP`, or `TIME` value. +- `INTERVAL expr unit` (Required): An `INTERVAL` expression. + +**Return type**: `TIMESTAMP` + +Synonyms: [`ADDDATE`](#adddate) +Antonyms: [`DATE_SUB`](#date_sub) + +#### Example ```ppl source=people @@ -580,7 +610,7 @@ source=people | fields `'2020-08-26' + 1h`, `ts '2020-08-26 01:01:01' + 1d` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -591,56 +621,57 @@ fetched rows / total rows = 1/1 +---------------------+-------------------------------+ ``` -## DATE_FORMAT +## DATE_FORMAT -### Description +**Usage**: `DATE_FORMAT(date, format)` -Usage: `date_format(date, format)` formats the date argument using the specifiers in the format argument. -If an argument of type TIME is provided, the local date is used. -The following table describes the available specifier arguments. +Formats the `date` argument using the specifiers in the `format` argument. If an argument of type `TIME` is provided, the local date is used. +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, `TIME`, or `TIMESTAMP` value. +- `format` (Required): A `STRING` containing format specifiers. + +**Return type**: `STRING` + +The following table describes the available format specifiers. | Specifier | Description | | --- | --- | -| %a | Abbreviated weekday name (Sun..Sat) | -| %b | Abbreviated month name (Jan..Dec) | -| %c | Month, numeric (0..12) | -| %D | Day of the month with English suffix (0th, 1st, 2nd, 3rd, ...) | -| %d | Day of the month, numeric (00..31) | -| %e | Day of the month, numeric (0..31) | -| %f | Microseconds (000000..999999) | -| %H | Hour (00..23) | -| %h | Hour (01..12) | -| %I | Hour (01..12) | -| %i | Minutes, numeric (00..59) | -| %j | Day of year (001..366) | -| %k | Hour (0..23) | -| %l | Hour (1..12) | -| %M | Month name (January..December) | -| %m | Month, numeric (00..12) | -| %p | AM or PM | -| %r | Time, 12-hour (hh:mm:ss followed by AM or PM) | -| %S | Seconds (00..59) | -| %s | Seconds (00..59) | -| %T | Time, 24-hour (hh:mm:ss) | -| %U | Week (00..53), where Sunday is the first day of the week; WEEK() mode 0 | -| %u | Week (00..53), where Monday is the first day of the week; WEEK() mode 1 | -| %V | Week (01..53), where Sunday is the first day of the week; WEEK() mode 2; used with %X | -| %v | Week (01..53), where Monday is the first day of the week; WEEK() mode 3; used with %x | -| %W | Weekday name (Sunday..Saturday) | -| %w | Day of the week (0=Sunday..6=Saturday) | -| %X | Year for the week where Sunday is the first day of the week, numeric, four digits; used with %V | -| %x | Year for the week, where Monday is the first day of the week, numeric, four digits; used with %v | -| %Y | Year, numeric, four digits | -| %y | Year, numeric (two digits) | -| %% | A literal % character | -| %x | x, for any “x” not listed above | -| x | x, for any smallcase/uppercase alphabet except [aydmshiHIMYDSEL] | - - -**Argument type:** `STRING/DATE/TIME/TIMESTAMP, STRING` -**Return type:** `STRING` -### Example +| `%a` | Abbreviated weekday name (Sun..Sat) | +| `%b` | Abbreviated month name (Jan..Dec) | +| `%c` | Month, numeric (0..12) | +| `%D` | Day of the month with English suffix (0th, 1st, 2nd, 3rd, ...) | +| `%d` | Day of the month, numeric (00..31) | +| `%e` | Day of the month, numeric (0..31) | +| `%f` | Microseconds (000000..999999) | +| `%H` | Hour (00..23) | +| `%h` | Hour (01..12) | +| `%I` | Hour (01..12) | +| `%i` | Minutes, numeric (00..59) | +| `%j` | Day of year (001..366) | +| `%k` | Hour (0..23) | +| `%l` | Hour (1..12) | +| `%M` | Month name (January..December) | +| `%m` | Month, numeric (00..12) | +| `%p` | AM or PM | +| `%r` | Time, 12-hour (hh:mm:ss followed by AM or PM) | +| `%S` | Seconds (00..59) | +| `%s` | Seconds (00..59) | +| `%T` | Time, 24-hour (hh:mm:ss) | +| `%U` | Week (00..53), where Sunday is the first day of the week; WEEK() mode 0 | +| `%u` | Week (00..53), where Monday is the first day of the week; WEEK() mode 1 | +| `%V` | Week (01..53), where Sunday is the first day of the week; WEEK() mode 2; used with `%X` | +| `%v` | Week (01..53), where Monday is the first day of the week; WEEK() mode 3; used with `%x` | +| `%W` | Weekday name (Sunday..Saturday) | +| `%w` | Day of the week (0=Sunday..6=Saturday) | +| `%X` | Year for the week where Sunday is the first day of the week, numeric, four digits; used with `%V` | +| `%x` | Year for the week, where Monday is the first day of the week, numeric, four digits; used with `%v` | +| `%Y` | Year, numeric, four digits | +| `%y` | Year, numeric (two digits) | +| `%%` | A literal % character | +| `x` | x, for any lowercase/uppercase alphabet except [aydmshiHIMYDSEL] | + +#### Example ```ppl source=people @@ -648,7 +679,7 @@ source=people | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -659,25 +690,29 @@ fetched rows / total rows = 1/1 +----------------------------------------------------+---------------------------------------------------------------------+ ``` -## DATETIME +## DATETIME -### Description +**Usage**: `DATETIME(timestamp)` or `DATETIME(date, to_timezone)` + +Converts the datetime to a new time zone. + +**Parameters**: +- `timestamp` (Required): A `TIMESTAMP` or `STRING` value. +- `to_timezone` (Optional): A `STRING` time zone value. + +**Return type**: `TIMESTAMP` + +#### Examples + +The following example converts a datetime to a different time zone: -Usage: `DATETIME(timestamp)`/ DATETIME(date, to_timezone) Converts the datetime to a new timezone -**Argument type:** `timestamp/STRING` -Return type map: -(TIMESTAMP, STRING) -> TIMESTAMP -(TIMESTAMP) -> TIMESTAMP -Converting timestamp with timezone to the second argument timezone. -### Example - ```ppl source=people | eval `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` = DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | fields `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -688,8 +723,7 @@ fetched rows / total rows = 1/1 +-------------------------------------------------+ ``` -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. -### Example +The valid time zone range is (-13:59, +14:00) inclusive. The following example shows that time zones outside of this range return `NULL`: ```ppl source=people @@ -697,7 +731,7 @@ source=people | fields `DATETIME('2008-01-01 02:00:00', '-14:00')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -708,16 +742,22 @@ fetched rows / total rows = 1/1 +-------------------------------------------+ ``` -## DATE_SUB +## DATE_SUB -### Description +**Usage**: `DATE_SUB(date, INTERVAL expr unit)` -Usage: `date_sub(date, INTERVAL expr unit)` subtracts the interval expr from date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. -**Argument type:** `DATE/TIMESTAMP/TIME, INTERVAL` -**Return type:** `TIMESTAMP` -Synonyms: [SUBDATE](#subdate) -Antonyms: [DATE_ADD](#date_add) -### Example +Subtracts the interval `expr` from `date`. If the first argument is `TIME`, today's date is used. If the first argument is `DATE`, the time at midnight is used. + +**Parameters**: +- `date` (Required): A `DATE`, `TIMESTAMP`, or `TIME` value. +- `INTERVAL expr unit` (Required): An `INTERVAL` expression. + +**Return type**: `TIMESTAMP` + +Synonyms: [`SUBDATE`](#subdate) +Antonyms: [`DATE_ADD`](#date_add) + +#### Example ```ppl source=people @@ -725,7 +765,7 @@ source=people | fields `'2008-01-02' - 31d`, `ts '2020-08-26 01:01:01' + 1h` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -736,12 +776,18 @@ fetched rows / total rows = 1/1 +---------------------+-------------------------------+ ``` -## DATEDIFF +## DATEDIFF -Usage: Calculates the difference of date parts of given values. If the first argument is time, today's date is used. -**Argument type:** `DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME` -**Return type:** `LONG` -### Example +**Usage**: `DATEDIFF(date1, date2)` +Calculates the difference of the date parts of given values. If the first argument is `TIME`, today's date is used. + +**Parameters**: +- `date1` (Required): A `DATE`, `TIMESTAMP`, or `TIME` value. +- `date2` (Required): A `DATE`, `TIMESTAMP`, or `TIME` value. + +**Return type**: `LONG` + +#### Example ```ppl source=people @@ -749,7 +795,7 @@ source=people | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`, `today - today` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -760,15 +806,20 @@ fetched rows / total rows = 1/1 +-----------------------------+-----------------------------+---------------+ ``` -## DAY +## DAY -### Description +**Usage**: `DAY(date)` -Usage: `day(date)` extracts the day of the month for date, in the range 1 to 31. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [DAYOFMONTH](#dayofmonth), [DAY_OF_MONTH](#day_of_month) -### Example +Extracts the day of the month for `date`, in the range 1 to 31. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`DAYOFMONTH`](#dayofmonth), [`DAY_OF_MONTH`](#day_of_month) + +#### Example ```ppl source=people @@ -776,7 +827,7 @@ source=people | fields `DAY(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -787,14 +838,18 @@ fetched rows / total rows = 1/1 +-------------------------+ ``` -## DAYNAME +## DAYNAME -### Description +**Usage**: `DAYNAME(date)` -Usage: `dayname(date)` returns the name of the weekday for date, including Monday, Tuesday, Wednesday, Thursday, Friday, Saturday and Sunday. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `STRING` -### Example +Returns the name of the weekday for `date`. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `STRING` + +#### Example ```ppl source=people @@ -802,7 +857,7 @@ source=people | fields `DAYNAME(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -813,15 +868,20 @@ fetched rows / total rows = 1/1 +-----------------------------+ ``` -## DAYOFMONTH +## DAYOFMONTH -### Description +**Usage**: `DAYOFMONTH(date)` -Usage: `dayofmonth(date)` extracts the day of the month for date, in the range 1 to 31. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [DAY](#day), [DAY_OF_MONTH](#day_of_month) -### Example +Extracts the day of the month for `date`, in the range 1 to 31. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`DAY`](#day), [`DAY_OF_MONTH`](#day_of_month) + +#### Example ```ppl source=people @@ -829,7 +889,7 @@ source=people | fields `DAYOFMONTH(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -840,15 +900,20 @@ fetched rows / total rows = 1/1 +--------------------------------+ ``` -## DAY_OF_MONTH +## DAY_OF_MONTH -### Description +**Usage**: `DAY_OF_MONTH(date)` -Usage: `day_of_month(date)` extracts the day of the month for date, in the range 1 to 31. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [DAY](#day), [DAYOFMONTH](#dayofmonth) -### Example +Extracts the day of the month for `date`, in the range 1 to 31. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`DAY`](#day), [`DAYOFMONTH`](#dayofmonth) + +#### Example ```ppl source=people @@ -856,7 +921,7 @@ source=people | fields `DAY_OF_MONTH(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -867,15 +932,20 @@ fetched rows / total rows = 1/1 +----------------------------------+ ``` -## DAYOFWEEK +## DAYOFWEEK -### Description +**Usage**: `DAYOFWEEK(date)` -Usage: `dayofweek(date)` returns the weekday index for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday). -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [DAY_OF_WEEK](#day_of_week) -### Example +Returns the weekday index for `date` (1 = Sunday, 2 = Monday, ..., 7 = Saturday). + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`DAY_OF_WEEK`](#day_of_week) + +#### Example ```ppl source=people @@ -883,7 +953,7 @@ source=people | fields `DAYOFWEEK(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -894,15 +964,20 @@ fetched rows / total rows = 1/1 +-------------------------------+ ``` -## DAY_OF_WEEK +## DAY_OF_WEEK -### Description +**Usage**: `DAY_OF_WEEK(date)` -Usage: `day_of_week(date)` returns the weekday index for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday). -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [DAYOFWEEK](#dayofweek) -### Example +Returns the weekday index for `date` (1 = Sunday, 2 = Monday, ..., 7 = Saturday). + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`DAYOFWEEK`](#dayofweek) + +#### Example ```ppl source=people @@ -910,7 +985,7 @@ source=people | fields `DAY_OF_WEEK(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -921,15 +996,20 @@ fetched rows / total rows = 1/1 +---------------------------------+ ``` -## DAYOFYEAR +## DAYOFYEAR -### Description +**Usage**: `DAYOFYEAR(date)` -Usage: dayofyear(date) returns the day of the year for date, in the range 1 to 366. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [DAY_OF_YEAR](#day_of_year) -### Example +Returns the day of the year for `date`, in the range 1 to 366. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`DAY_OF_YEAR`](#day_of_year) + +#### Example ```ppl source=people @@ -937,7 +1017,7 @@ source=people | fields `DAYOFYEAR(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -948,15 +1028,20 @@ fetched rows / total rows = 1/1 +-------------------------------+ ``` -## DAY_OF_YEAR +## DAY_OF_YEAR -### Description +**Usage**: `DAY_OF_YEAR(date)` -Usage: day_of_year(date) returns the day of the year for date, in the range 1 to 366. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [DAYOFYEAR](#dayofyear) -### Example +Returns the day of the year for `date`, in the range 1 to 366. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`DAYOFYEAR`](#dayofyear) + +#### Example ```ppl source=people @@ -964,7 +1049,7 @@ source=people | fields `DAY_OF_YEAR(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -975,42 +1060,44 @@ fetched rows / total rows = 1/1 +---------------------------------+ ``` -## EXTRACT +## EXTRACT + +**Usage**: `EXTRACT(part FROM date)` -### Description +Returns a `LONG` containing digits in order according to the given `part` argument. The specific format of the returned `LONG` is determined by the following table. -Usage: `extract(part FROM date)` returns a LONG with digits in order according to the given 'part' arguments. -The specific format of the returned long is determined by the table below. -**Argument type:** `PART, where PART is one of the following tokens in the table below.` -The format specifiers found in this table are the same as those found in the [DATE_FORMAT](#date_format) function. -The following table describes the mapping of a 'part' to a particular format. +**Parameters**: +- `part` (Required): A part token (see following table). +- `date` (Required): A `STRING`, `DATE`, `TIME`, or `TIMESTAMP` value. +**Return type**: `LONG` -| Part | Format | +The format specifiers found in this table are the same as those found in the [`DATE_FORMAT`](#date_format) function. The following table describes the mapping of a `part` to a particular format. + + +| `part` | Format | | --- | --- | -| MICROSECOND | %f | -| SECOND | %s | -| MINUTE | %i | -| HOUR | %H | -| DAY | %d | -| WEEK | %X | -| MONTH | %m | -| YEAR | %V | -| SECOND_MICROSECOND | %s%f | -| MINUTE_MICROSECOND | %i%s%f | -| MINUTE_SECOND | %i%s | -| HOUR_MICROSECOND | %H%i%s%f | -| HOUR_SECOND | %H%i%s | -| HOUR_MINUTE | %H%i | -| DAY_MICROSECOND | %d%H%i%s%f | -| DAY_SECOND | %d%H%i%s | -| DAY_MINUTE | %d%H%i | -| DAY_HOUR | %d%H% | -| YEAR_MONTH | %V%m | - - -**Return type:** `LONG` -### Example +| `MICROSECOND` | `%f` | +| `SECOND` | `%s` | +| `MINUTE` | `%i` | +| `HOUR` | `%H` | +| `DAY` | `%d` | +| `WEEK` | `%X` | +| `MONTH` | `%m` | +| `YEAR` | `%V` | +| `SECOND_MICROSECOND` | `%s%f` | +| `MINUTE_MICROSECOND` | `%i%s%f` | +| `MINUTE_SECOND` | `%i%s` | +| `HOUR_MICROSECOND` | `%H%i%s%f` | +| `HOUR_SECOND` | `%H%i%s` | +| `HOUR_MINUTE` | `%H%i` | +| `DAY_MICROSECOND` | `%d%H%i%s%f` | +| `DAY_SECOND` | `%d%H%i%s` | +| `DAY_MINUTE` | `%d%H%i` | +| `DAY_HOUR` | `%d%H%` | +| `YEAR_MONTH` | `%V%m` | + +#### Example ```ppl source=people @@ -1018,7 +1105,7 @@ source=people | fields `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1029,14 +1116,18 @@ fetched rows / total rows = 1/1 +------------------------------------------------+ ``` -## FROM_DAYS +## FROM_DAYS -### Description +**Usage**: `FROM_DAYS(N)` -Usage: `from_days(N)` returns the date value given the day number N. -**Argument type:** `INTEGER/LONG` -**Return type:** `DATE` -### Example +Returns the date value given the day number `N`. + +**Parameters**: +- `N` (Required): An `INTEGER` or `LONG` value. + +**Return type**: `DATE` + +#### Example ```ppl source=people @@ -1044,7 +1135,7 @@ source=people | fields `FROM_DAYS(733687)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1055,18 +1146,19 @@ fetched rows / total rows = 1/1 +-------------------+ ``` -## FROM_UNIXTIME +## FROM_UNIXTIME + +**Usage**: `FROM_UNIXTIME(timestamp)` or `FROM_UNIXTIME(timestamp, format)` + +Returns a representation of the argument as a timestamp or character string value. Performs the reverse conversion for the [`UNIX_TIMESTAMP`](#unix_timestamp) function. If the second argument is provided, it is used to format the result in the same way as the format string used for the [`DATE_FORMAT`](#date_format) function. If the timestamp is outside the range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), the function returns `NULL`. -### Description +**Parameters**: +- `timestamp` (Required): A `DOUBLE` value representing Unix timestamp. +- `format` (Optional): A `STRING` format specifier. -Usage: Returns a representation of the argument given as a timestamp or character string value. Perform reverse conversion for [UNIX_TIMESTAMP](#unix_timestamp) function. -If second argument is provided, it is used to format the result in the same way as the format string used for the [DATE_FORMAT](#date_format) function. -If timestamp is outside of range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), function returns NULL. -**Argument type:** `DOUBLE, STRING` -Return type map: -DOUBLE -> TIMESTAMP -DOUBLE, STRING -> STRING -Examples +**Return type**: `TIMESTAMP` (without format), `STRING` (with format) + +**Examples** ```ppl source=people @@ -1074,7 +1166,7 @@ source=people | fields `FROM_UNIXTIME(1220249547)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1091,7 +1183,7 @@ source=people | fields `FROM_UNIXTIME(1220249547, '%T')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1102,14 +1194,19 @@ fetched rows / total rows = 1/1 +---------------------------------+ ``` -## GET_FORMAT +## GET_FORMAT + +**Usage**: `GET_FORMAT(type, format)` -### Description +Returns a string value containing string format specifiers based on the input arguments. -Usage: Returns a string value containing string format specifiers based on the input arguments. -**Argument type:** `TYPE, STRING, where TYPE must be one of the following tokens: [DATE, TIME, TIMESTAMP], and` -STRING must be one of the following tokens: ["USA", "JIS", "ISO", "EUR", "INTERNAL"] (" can be replaced by '). -Examples +**Parameters**: +- `type` (Required): One of the following tokens: `DATE`, `TIME`, `TIMESTAMP`. +- `format` (Required): A `STRING` that must be one of: `USA`, `JIS`, `ISO`, `EUR`, `INTERNAL`. + +**Return type**: `STRING` + +**Examples** ```ppl source=people @@ -1117,7 +1214,7 @@ source=people | fields `GET_FORMAT(DATE, 'USA')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1128,15 +1225,20 @@ fetched rows / total rows = 1/1 +-------------------------+ ``` -## HOUR +## HOUR -### Description +**Usage**: `HOUR(time)` -Usage: `hour(time)` extracts the hour value for time. Different from the time of day value, the time value has a large range and can be greater than 23, so the return value of hour(time) can be also greater than 23. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [HOUR_OF_DAY](#hour_of_day) -### Example +Extracts the hour value for `time`. Different from a time of day value, the time value has a large range and can be greater than 23, so the return value of `HOUR(time)` can also be greater than 23. + +**Parameters**: +- `time` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`HOUR_OF_DAY`](#hour_of_day) + +#### Example ```ppl source=people @@ -1144,7 +1246,7 @@ source=people | fields `HOUR(TIME('01:02:03'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1155,15 +1257,20 @@ fetched rows / total rows = 1/1 +------------------------+ ``` -## HOUR_OF_DAY +## HOUR_OF_DAY -### Description +**Usage**: `HOUR_OF_DAY(time)` -Usage: `hour_of_day(time)` extracts the hour value for time. Different from the time of day value, the time value has a large range and can be greater than 23, so the return value of hour_of_day(time) can be also greater than 23. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [HOUR](#hour) -### Example +Extracts the hour value for `time`. Different from a time of day value, the time value has a large range and can be greater than 23, so the return value of `HOUR_OF_DAY(time)` can also be greater than 23. + +**Parameters**: +- `time` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`HOUR`](#hour) + +#### Example ```ppl source=people @@ -1171,7 +1278,7 @@ source=people | fields `HOUR_OF_DAY(TIME('01:02:03'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1184,10 +1291,16 @@ fetched rows / total rows = 1/1 ## LAST_DAY -Usage: Returns the last day of the month as a DATE for a valid argument. -**Argument type:** `DATE/STRING/TIMESTAMP/TIME` -**Return type:** `DATE` -### Example +**Usage**: `LAST_DAY(date)` + +Returns the last day of the month as a `DATE` for a valid argument. + +**Parameters**: +- `date` (Required): A `DATE`, `STRING`, `TIMESTAMP`, or `TIME` value. + +**Return type**: `DATE` + +#### Example ```ppl source=people @@ -1195,7 +1308,7 @@ source=people | fields `last_day('2023-02-06')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1206,12 +1319,17 @@ fetched rows / total rows = 1/1 +------------------------+ ``` -## LOCALTIMESTAMP +## LOCALTIMESTAMP -### Description +**Usage**: `LOCALTIMESTAMP()` -`LOCALTIMESTAMP()` are synonyms for [NOW()](#now). -### Example +`LOCALTIMESTAMP()` is a synonym for [`NOW()`](#now). + +**Parameters**: None + +**Return type**: `TIMESTAMP` + +#### Example ```ppl ignore source=people @@ -1219,7 +1337,7 @@ source=people | fields `LOCALTIMESTAMP()` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1230,12 +1348,17 @@ fetched rows / total rows = 1/1 +---------------------+ ``` -## LOCALTIME +## LOCALTIME -### Description +**Usage**: `LOCALTIME()` -`LOCALTIME()` are synonyms for [NOW()](#now). -### Example +`LOCALTIME()` is a synonym for [`NOW()`](#now). + +**Parameters**: None + +**Return type**: `TIMESTAMP` + +#### Example ```ppl ignore source=people @@ -1243,7 +1366,7 @@ source=people | fields `LOCALTIME()` ``` -Expected output: +The query returns the following results: ```text ignore fetched rows / total rows = 1/1 @@ -1254,24 +1377,25 @@ fetched rows / total rows = 1/1 +---------------------+ ``` -## MAKEDATE +## MAKEDATE + +**Usage**: `MAKEDATE(year, dayofyear)` + +Returns a date, given `year` and `day-of-year` values. `dayofyear` must be greater than 0, otherwise the result is `NULL`. The result is also `NULL` if either argument is `NULL`. Arguments are rounded to an integer. -### Description +**Parameters**: +- `year` (Required): A `DOUBLE` value for the year. +- `dayofyear` (Required): A `DOUBLE` value for the day of year. + +**Return type**: `DATE` -Returns a date, given `year` and `day-of-year` values. `dayofyear` must be greater than 0 or the result is `NULL`. The result is also `NULL` if either argument is `NULL`. -Arguments are rounded to an integer. Limitations: -- Zero `year` interpreted as 2000; -- Negative `year` is not accepted; -- `day-of-year` should be greater than zero; -- `day-of-year` could be greater than 365/366, calculation switches to the next year(s) (see example). - -Specifications: -1. MAKEDATE(DOUBLE, DOUBLE) -> DATE - -**Argument type:** `DOUBLE` -**Return type:** `DATE` -### Example +- A zero `year` is interpreted as 2000 +- A negative `year` is not accepted +- `day-of-year` should be greater than zero +- `day-of-year` can be greater than 365/366, and the calculation switches to the next year(s) (see example) + +#### Example ```ppl source=people @@ -1279,7 +1403,7 @@ source=people | fields `MAKEDATE(1945, 5.9)`, `MAKEDATE(1984, 1984)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1290,22 +1414,24 @@ fetched rows / total rows = 1/1 +---------------------+----------------------+ ``` -## MAKETIME +## MAKETIME -### Description +**Usage**: `MAKETIME(hour, minute, second)` + +Returns a time value calculated from the hour, minute, and second arguments. Returns `NULL` if any of its arguments are `NULL`. The second argument can have a fractional part, and the rest of the arguments are rounded to an integer. + +**Parameters**: +- `hour` (Required): A `DOUBLE` value for the hour. +- `minute` (Required): A `DOUBLE` value for the minute. +- `second` (Required): A `DOUBLE` value for the second. + +**Return type**: `TIME` -Returns a time value calculated from the hour, minute, and second arguments. Returns `NULL` if any of its arguments are `NULL`. -The second argument can have a fractional part, rest arguments are rounded to an integer. Limitations: -- 24-hour clock is used, available time range is [00:00:00.0 - 23:59:59.(9)]; -- Up to 9 digits of second fraction part is taken (nanosecond precision). - -Specifications: -1. MAKETIME(DOUBLE, DOUBLE, DOUBLE) -> TIME - -**Argument type:** `DOUBLE` -**Return type:** `TIME` -### Example +- A 24-hour clock is used, and the available time range is [00:00:00.0 - 23:59:59.(9)] +- Up to 9 digits of the second fraction part are taken (nanosecond precision) + +#### Example ```ppl source=people @@ -1313,7 +1439,7 @@ source=people | fields `MAKETIME(20, 30, 40)`, `MAKETIME(20.2, 49.5, 42.100502)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1324,14 +1450,18 @@ fetched rows / total rows = 1/1 +----------------------+---------------------------------+ ``` -## MICROSECOND +## MICROSECOND -### Description +**Usage**: `MICROSECOND(expr)` -Usage: `microsecond(expr)` returns the microseconds from the time or timestamp expression expr as a number in the range from 0 to 999999. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `INTEGER` -### Example +Returns the microseconds from the time or timestamp expression `expr` as a number in the range from 0 to 999999. + +**Parameters**: +- `expr` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +#### Example ```ppl source=people @@ -1339,7 +1469,7 @@ source=people | fields `MICROSECOND(TIME('01:02:03.123456'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1350,15 +1480,20 @@ fetched rows / total rows = 1/1 +--------------------------------------+ ``` -## MINUTE +## MINUTE -### Description +**Usage**: `MINUTE(time)` -Usage: `minute(time)` returns the minute for time, in the range 0 to 59. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [MINUTE_OF_HOUR](#minute_of_hour) -### Example +Returns the minute for `time`, in the range 0 to 59. + +**Parameters**: +- `time` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`MINUTE_OF_HOUR`](#minute_of_hour) + +#### Example ```ppl source=people @@ -1366,7 +1501,7 @@ source=people | fields `MINUTE(TIME('01:02:03'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1377,14 +1512,18 @@ fetched rows / total rows = 1/1 +--------------------------+ ``` -## MINUTE_OF_DAY +## MINUTE_OF_DAY -### Description +**Usage**: `MINUTE_OF_DAY(time)` -Usage: `minute(time)` returns the amount of minutes in the day, in the range of 0 to 1439. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `INTEGER` -### Example +Returns the amount of minutes in the day, in the range of 0 to 1439. + +**Parameters**: +- `time` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +#### Example ```ppl source=people @@ -1392,7 +1531,7 @@ source=people | fields `MINUTE_OF_DAY(TIME('01:02:03'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1403,15 +1542,20 @@ fetched rows / total rows = 1/1 +---------------------------------+ ``` -## MINUTE_OF_HOUR +## MINUTE_OF_HOUR -### Description +**Usage**: `MINUTE_OF_HOUR(time)` -Usage: `minute(time)` returns the minute for time, in the range 0 to 59. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [MINUTE](#minute) -### Example +Returns the minute for `time`, in the range 0 to 59. + +**Parameters**: +- `time` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`MINUTE`](#minute) + +#### Example ```ppl source=people @@ -1419,7 +1563,7 @@ source=people | fields `MINUTE_OF_HOUR(TIME('01:02:03'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1430,15 +1574,20 @@ fetched rows / total rows = 1/1 +----------------------------------+ ``` -## MONTH +## MONTH -### Description +**Usage**: `MONTH(date)` -Usage: `month(date)` returns the month for date, in the range 1 to 12 for January to December. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [MONTH_OF_YEAR](#month_of_year) -### Example +Returns the month for `date`, in the range 1 to 12 for January to December. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`MONTH_OF_YEAR`](#month_of_year) + +#### Example ```ppl source=people @@ -1446,7 +1595,7 @@ source=people | fields `MONTH(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1457,15 +1606,20 @@ fetched rows / total rows = 1/1 +---------------------------+ ``` -## MONTH_OF_YEAR +## MONTH_OF_YEAR -### Description +**Usage**: `MONTH_OF_YEAR(date)` -Usage: `month_of_year(date)` returns the month for date, in the range 1 to 12 for January to December. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [MONTH](#month) -### Example +Returns the month for `date`, in the range 1 to 12 for January to December. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`MONTH`](#month) + +#### Example ```ppl source=people @@ -1473,7 +1627,7 @@ source=people | fields `MONTH_OF_YEAR(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1484,14 +1638,18 @@ fetched rows / total rows = 1/1 +-----------------------------------+ ``` -## MONTHNAME +## MONTHNAME -### Description +**Usage**: `MONTHNAME(date)` -Usage: `monthname(date)` returns the full name of the month for date. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `STRING` -### Example +Returns the full name of the month for `date`. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `STRING` + +#### Example ```ppl source=people @@ -1499,7 +1657,7 @@ source=people | fields `MONTHNAME(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1510,15 +1668,17 @@ fetched rows / total rows = 1/1 +-------------------------------+ ``` -## NOW +## NOW -### Description +**Usage**: `NOW()` -Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss' format. The value is expressed in the UTC time zone. -`NOW()` returns a constant time that indicates the time at which the statement began to execute. This differs from the behavior for [SYSDATE()](#sysdate), which returns the exact time at which it executes. -**Return type:** `TIMESTAMP` -Specification: NOW() -> TIMESTAMP -### Example +Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss' format. The value is expressed in the UTC time zone. `NOW()` returns a constant time that indicates the time at which the statement began to execute. This differs from the behavior for [`SYSDATE()`](#sysdate), which returns the exact time at which it executes. + +**Parameters**: None + +**Return type**: `TIMESTAMP` + +#### Example ```ppl ignore source=people @@ -1526,7 +1686,7 @@ source=people | fields `value_1`, `value_2` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1537,14 +1697,19 @@ fetched rows / total rows = 1/1 +---------------------+---------------------+ ``` -## PERIOD_ADD +## PERIOD_ADD -### Description +**Usage**: `PERIOD_ADD(P, N)` -Usage: `period_add(P, N)` add N months to period P (in the format YYMM or YYYYMM). Returns a value in the format YYYYMM. -**Argument type:** `INTEGER, INTEGER` -**Return type:** `INTEGER` -### Example +Adds `N` months to period `P` (in the format YYMM or YYYYMM). Returns a value in the format YYYYMM. + +**Parameters**: +- `P` (Required): An `INTEGER` value representing a period in YYMM or YYYYMM format. +- `N` (Required): An `INTEGER` number of months to add. + +**Return type**: `INTEGER` + +#### Example ```ppl source=people @@ -1552,7 +1717,7 @@ source=people | fields `PERIOD_ADD(200801, 2)`, `PERIOD_ADD(200801, -12)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1563,14 +1728,19 @@ fetched rows / total rows = 1/1 +-----------------------+-------------------------+ ``` -## PERIOD_DIFF +## PERIOD_DIFF -### Description +**Usage**: `PERIOD_DIFF(P1, P2)` -Usage: `period_diff(P1, P2)` returns the number of months between periods P1 and P2 given in the format YYMM or YYYYMM. -**Argument type:** `INTEGER, INTEGER` -**Return type:** `INTEGER` -### Example +Returns the number of months between periods `P1` and `P2` given in the format YYMM or YYYYMM. + +**Parameters**: +- `P1` (Required): An `INTEGER` value representing a period in YYMM or YYYYMM format. +- `P2` (Required): An `INTEGER` value representing a period in YYMM or YYYYMM format. + +**Return type**: `INTEGER` + +#### Example ```ppl source=people @@ -1578,7 +1748,7 @@ source=people | fields `PERIOD_DIFF(200802, 200703)`, `PERIOD_DIFF(200802, 201003)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1589,14 +1759,18 @@ fetched rows / total rows = 1/1 +-----------------------------+-----------------------------+ ``` -## QUARTER +## QUARTER -### Description +**Usage**: `QUARTER(date)` -Usage: `quarter(date)` returns the quarter of the year for date, in the range 1 to 4. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -### Example +Returns the quarter of the year for `date`, in the range 1 to 4. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +#### Example ```ppl source=people @@ -1604,7 +1778,7 @@ source=people | fields `QUARTER(DATE('2020-08-26'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1615,17 +1789,18 @@ fetched rows / total rows = 1/1 +-----------------------------+ ``` -## SEC_TO_TIME +## SEC_TO_TIME -### Description +**Usage**: `SEC_TO_TIME(number)` -Usage: `sec_to_time(number)` returns the time in HH:mm:ssss[.nnnnnn] format. -Note that the function returns a time between 00:00:00 and 23:59:59. -If an input value is too large (greater than 86399), the function will wrap around and begin returning outputs starting from 00:00:00. -If an input value is too small (less than 0), the function will wrap around and begin returning outputs counting down from 23:59:59. -**Argument type:** `INTEGER, LONG, DOUBLE, FLOAT` -**Return type:** `TIME` -### Example +Returns the time in HH:mm:ss[.nnnnnn] format. Note that the function returns a time between 00:00:00 and 23:59:59. If the input value is too large (greater than 86399), the function will wrap around and begin returning outputs starting from 00:00:00. If the input value is too small (less than 0), the function will wrap around and begin returning outputs counting down from 23:59:59. + +**Parameters**: +- `number` (Required): An `INTEGER`, `LONG`, `DOUBLE`, or `FLOAT` value. + +**Return type**: `TIME` + +#### Example ```ppl source=people @@ -1634,7 +1809,7 @@ source=people | fields `SEC_TO_TIME(3601)`, `SEC_TO_TIME(1234.123)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1645,15 +1820,20 @@ fetched rows / total rows = 1/1 +-------------------+-----------------------+ ``` -## SECOND +## SECOND -### Description +**Usage**: `SECOND(time)` -Usage: `second(time)` returns the second for time, in the range 0 to 59. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [SECOND_OF_MINUTE](#second_of_minute) -### Example +Returns the second for `time`, in the range 0 to 59. + +**Parameters**: +- `time` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`SECOND_OF_MINUTE`](#second_of_minute) + +#### Example ```ppl source=people @@ -1661,7 +1841,7 @@ source=people | fields `SECOND(TIME('01:02:03'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1672,15 +1852,20 @@ fetched rows / total rows = 1/1 +--------------------------+ ``` -## SECOND_OF_MINUTE +## SECOND_OF_MINUTE -### Description +**Usage**: `SECOND_OF_MINUTE(time)` -Usage: `second_of_minute(time)` returns the second for time, in the range 0 to 59. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `INTEGER` -Synonyms: [SECOND](#second) -### Example +Returns the second for `time`, in the range 0 to 59. + +**Parameters**: +- `time` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +Synonyms: [`SECOND`](#second) + +#### Example ```ppl source=people @@ -1688,7 +1873,7 @@ source=people | fields `SECOND_OF_MINUTE(TIME('01:02:03'))` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1699,68 +1884,71 @@ fetched rows / total rows = 1/1 +------------------------------------+ ``` -## STRFTIME +## STRFTIME + +**Usage**: `STRFTIME(time, format)` + +Takes a UNIX timestamp (in seconds) and renders it as a string using the format specified. For numeric inputs, the UNIX time must be in seconds. Values greater than 100000000000 are automatically treated as milliseconds and converted to seconds. You can use time format variables with the strftime function. This function performs the reverse operation of [`UNIX_TIMESTAMP`](#unix_timestamp) and is similar to [`FROM_UNIXTIME`](#from_unixtime) but with POSIX-style format specifiers. -**Version: 3.3.0** -### Description +**Parameters**: +- `time` (Required): An `INTEGER`, `LONG`, `DOUBLE`, or `TIMESTAMP` value. +- `format` (Required): A `STRING` format specifier. -Usage: `strftime(time, format)` takes a UNIX timestamp (in seconds) and renders it as a string using the format specified. For numeric inputs, the UNIX time must be in seconds. Values greater than 100000000000 are automatically treated as milliseconds and converted to seconds. -You can use time format variables with the strftime function. This function performs the reverse operation of [UNIX_TIMESTAMP](#unix_timestamp) and is similar to [FROM_UNIXTIME](#from_unixtime) but with POSIX-style format specifiers. - - **Available only when Calcite engine is enabled** +**Return type**: `STRING` + +**Notes**: +- Available only when Calcite engine is enabled - All timestamps are interpreted as UTC timezone - Text formatting uses language-neutral Locale.ROOT (weekday and month names appear in abbreviated form) - String inputs are NOT supported - use `unix_timestamp()` to convert strings first - Functions that return date/time values (like `date()`, `now()`, `timestamp()`) are supported -**Argument type:** `INTEGER/LONG/DOUBLE/TIMESTAMP, STRING` -**Return type:** `STRING` -Format specifiers: -The following table describes the available specifier arguments. +The following table describes the available specifier arguments: | Specifier | Description | | --- | --- | -| %a | Abbreviated weekday name (Mon..Sun) | -| %A | Weekday name (Mon..Sun) - Note: Locale.ROOT uses abbreviated form | -| %b | Abbreviated month name (Jan..Dec) | -| %B | Month name (Jan..Dec) - Note: Locale.ROOT uses abbreviated form | -| %c | Date and time (e.g., Mon Jul 18 09:30:00 2019) | -| %C | Century as 2-digit decimal number | -| %d | Day of the month, zero-padded (01..31) | -| %e | Day of the month, space-padded ( 1..31) | -| %Ez | Timezone offset in minutes from UTC (e.g., +0 for UTC, +330 for IST, -300 for EST) | -| %f | Microseconds as decimal number (000000..999999) | -| %F | ISO 8601 date format (%Y-%m-%d) | -| %g | ISO 8601 year without century (00..99) | -| %G | ISO 8601 year with century | -| %H | Hour (24-hour clock) (00..23) | -| %I | Hour (12-hour clock) (01..12) | -| %j | Day of year (001..366) | -| %k | Hour (24-hour clock), space-padded ( 0..23) | -| %m | Month as decimal number (01..12) | -| %M | Minute (00..59) | -| %N | Subsecond digits (default %9N = nanoseconds). Accepts any precision value from 1-9 (e.g., %3N = 3 digits, %5N = 5 digits, %9N = 9 digits). The precision directly controls the number of digits displayed | -| %p | AM or PM | -| %Q | Subsecond component (default milliseconds). Can specify precision: %3Q = milliseconds, %6Q = microseconds, %9Q = nanoseconds. Other precision values (e.g., %5Q) default to %3Q | -| %s | UNIX Epoch timestamp in seconds | -| %S | Second (00..59) | -| %T | Time in 24-hour notation (%H:%M:%S) | -| %U | Week of year starting from 0 (00..53) | -| %V | ISO week number (01..53) | -| %w | Weekday as decimal (0=Sunday..6=Saturday) | -| %x | Date in MM/dd/yyyy format (e.g., 07/13/2019) | -| %X | Time in HH:mm:ss format (e.g., 09:30:00) | -| %y | Year without century (00..99) | -| %Y | Year with century | -| %z | Timezone offset (+hhmm or -hhmm) | -| %:z | Timezone offset with colon (+hh:mm or -hh:mm) | -| %::z | Timezone offset with colons (+hh:mm:ss) | -| %:::z | Timezone offset hour only (+hh or -hh) | -| %Z | Timezone abbreviation (e.g., EST, PDT) | -| %% | Literal % character | - - -Examples +| `%a` | Abbreviated weekday name (Mon..Sun) | +| `%A` | Weekday name (Mon..Sun) - Note: Locale.ROOT uses abbreviated form | +| `%b` | Abbreviated month name (Jan..Dec) | +| `%B` | Month name (Jan..Dec) - Note: Locale.ROOT uses abbreviated form | +| `%c` | Date and time (e.g., Mon Jul 18 09:30:00 2019) | +| `%C` | Century as 2-digit decimal number | +| `%d` | Day of the month, zero-padded (01..31) | +| `%e` | Day of the month, space-padded ( 1..31) | +| `%Ez` | Timezone offset in minutes from UTC (e.g., +0 for UTC, +330 for IST, -300 for EST) | +| `%f` | Microseconds as decimal number (000000..999999) | +| `%F` | ISO 8601 date format (`%Y-%m-%d`) | +| `%g` | ISO 8601 year without century (00..99) | +| `%G` | ISO 8601 year with century | +| `%H` | Hour (24-hour clock) (00..23) | +| `%I` | Hour (12-hour clock) (01..12) | +| `%j` | Day of year (001..366) | +| `%k` | Hour (24-hour clock), space-padded ( 0..23) | +| `%m` | Month as decimal number (01..12) | +| `%M` | Minute (00..59) | +| `%N` | Subsecond digits (default `%9N` = nanoseconds). Accepts any precision value from 1-9 (e.g., `%3N` = 3 digits, `%5N` = 5 digits, `%9N` = 9 digits). The precision directly controls the number of digits displayed | +| `%p` | AM or PM | +| `%Q` | Subsecond component (default milliseconds). Can specify precision: `%3Q` = milliseconds, `%6Q` = microseconds, `%9Q` = nanoseconds. Other precision values (e.g., `%5Q`) default to `%3Q` | +| `%s` | UNIX Epoch timestamp in seconds | +| `%S` | Second (00..59) | +| `%T` | Time in 24-hour notation (`%H:%M:%S`) | +| `%U` | Week of year starting from 0 (00..53) | +| `%V` | ISO week number (01..53) | +| `%w` | Weekday as decimal (0=Sunday..6=Saturday) | +| `%x` | Date in MM/dd/yyyy format (e.g., 07/13/2019) | +| `%X` | Time in HH:mm:ss format (e.g., 09:30:00) | +| `%y` | Year without century (00..99) | +| `%Y` | Year with century | +| `%z` | Timezone offset (+hhmm or -hhmm) | +| `%:z` | Timezone offset with colon (+hh:mm or -hh:mm) | +| `%::z` | Timezone offset with colons (+hh:mm:ss) | +| `%:::z` | Timezone offset hour only (+hh or -hh) | +| `%Z` | Timezone abbreviation (e.g., EST, PDT) | +| `%%` | Literal % character | + + +**Examples** ```ppl ignore source=people | eval `strftime(1521467703, "%Y-%m-%dT%H:%M:%S")` = strftime(1521467703, "%Y-%m-%dT%H:%M:%S") | fields `strftime(1521467703, "%Y-%m-%dT%H:%M:%S")` @@ -1861,15 +2049,17 @@ fetched rows / total rows = 1/1 ``` ## STR_TO_DATE -### Description +**Usage**: `STR_TO_DATE(string, format)` -Usage: `str_to_date(string, string)` is used to extract a TIMESTAMP from the first argument string using the formats specified in the second argument string. -The input argument must have enough information to be parsed as a DATE, TIMESTAMP, or TIME. -Acceptable string format specifiers are the same as those used in the [DATE_FORMAT](#date_format) function. -It returns NULL when a statement cannot be parsed due to an invalid pair of arguments, and when 0 is provided for any DATE field. Otherwise, it will return a TIMESTAMP with the parsed values (as well as default values for any field that was not parsed). -**Argument type:** `STRING, STRING` -**Return type:** `TIMESTAMP` -### Example +Extracts a `TIMESTAMP` from the first argument string using the formats specified in the second argument string. The input argument must have enough information to be parsed as a `DATE`, `TIMESTAMP`, or `TIME`. Acceptable string format specifiers are the same as those used in the [`DATE_FORMAT`](#date_format) function. Returns `NULL` when the statement cannot be parsed due to an invalid pair of arguments, and when 0 is provided for any `DATE` field. Otherwise, returns a `TIMESTAMP` with the parsed values (as well as default values for any field that was not parsed). + +**Parameters**: +- `string` (Required): A `STRING` value to parse. +- `format` (Required): A `STRING` format specifier. + +**Return type**: `TIMESTAMP` + +#### Example ```ppl @@ -1879,7 +2069,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -1895,18 +2085,20 @@ fetched rows / total rows = 1/1 ## SUBDATE -### Description - -Usage: `subdate(date, INTERVAL expr unit)` / subdate(date, days) subtracts the interval expr from date; subdate(date, days) subtracts the second argument as integer number of days from date. -If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. -**Argument type:** `DATE/TIMESTAMP/TIME, INTERVAL/LONG` -Return type map: -(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP -(DATE, LONG) -> DATE -(TIMESTAMP/TIME, LONG) -> TIMESTAMP -Synonyms: [DATE_SUB](#date_sub) when invoked with the INTERVAL form of the second argument. -Antonyms: [ADDDATE](#adddate) -### Example +**Usage**: `SUBDATE(date, INTERVAL expr unit)` or `SUBDATE(date, days)` + +Subtracts the interval `expr` from `date`, or subtracts the second argument as an integer number of days from `date`. If the first argument is `TIME`, today's date is used. If the first argument is `DATE`, the time at midnight is used. + +**Parameters**: +- `date` (Required): A `DATE`, `TIMESTAMP`, or `TIME` value. +- `expr` (Required): Either an `INTERVAL` expression or a `LONG` number of days. + +**Return type**: `TIMESTAMP` (with INTERVAL), `DATE` (DATE with LONG), `TIMESTAMP` (TIMESTAMP/TIME with LONG) + +Synonyms: [`DATE_SUB`](#date_sub) when invoked with the INTERVAL form of the second argument +Antonyms: [`ADDDATE`](#adddate) + +#### Example ```ppl @@ -1916,7 +2108,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -1932,15 +2124,19 @@ fetched rows / total rows = 1/1 ## SUBTIME -### Description +**Usage**: `SUBTIME(expr1, expr2)` -Usage: `subtime(expr1, expr2)` subtracts expr2 from expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. -**Argument type:** `DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME` -Return type map: -(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP -(TIME, DATE/TIMESTAMP/TIME) -> TIME -Antonyms: [ADDTIME](#addtime) -### Example +Subtracts `expr2` from `expr1` and returns the result. If an argument is `TIME`, today's date is used. If an argument is `DATE`, the time at midnight is used. + +**Parameters**: +- `expr1` (Required): A `DATE`, `TIMESTAMP`, or `TIME` value. +- `expr2` (Required): A `DATE`, `TIMESTAMP`, or `TIME` value. + +**Return type**: `TIMESTAMP` (DATE/TIMESTAMP with DATE/TIMESTAMP/TIME), `TIME` (TIME with DATE/TIMESTAMP/TIME) + +Antonyms: [`ADDTIME`](#addtime) + +#### Example ```ppl @@ -1950,7 +2146,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -1972,7 +2168,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -1994,7 +2190,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2016,7 +2212,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2038,7 +2234,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2054,15 +2250,16 @@ fetched rows / total rows = 1/1 ## SYSDATE -### Description +**Usage**: `SYSDATE()` or `SYSDATE(precision)` -Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss[.nnnnnn]'. -SYSDATE() returns the date and time at which it executes in UTC. This differs from the behavior for [NOW()](#now), which returns a constant time that indicates the time at which the statement began to execute. -If an argument is given, it specifies a fractional seconds precision from 0 to 6, the return value includes a fractional seconds part of that many digits. -Optional argument type: INTEGER -**Return type:** `TIMESTAMP` -Specification: SYSDATE([INTEGER]) -> TIMESTAMP -### Example +Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss[.nnnnnn]'. `SYSDATE()` returns the date and time at which it executes in UTC. This differs from the behavior for [`NOW()`](#now), which returns a constant time that indicates the time at which the statement began to execute. If an argument is given, it specifies a fractional seconds precision from 0 to 6, the return value includes a fractional seconds part of that many digits. + +**Parameters**: +- `precision` (Optional): An `INTEGER` value from 0 to 6 for fractional seconds precision. + +**Return type**: `TIMESTAMP` + +#### Example ```ppl ignore @@ -2072,7 +2269,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2088,12 +2285,16 @@ fetched rows / total rows = 1/1 ## TIME -### Description +**Usage**: `TIME(expr)` -Usage: `time(expr)` constructs a time type with the input string expr as a time. If the argument is of date/time/timestamp, it extracts the time value part from the expression. -**Argument type:** `STRING/DATE/TIME/TIMESTAMP` -**Return type:** `TIME` -### Example +Constructs a time type with the input string `expr` as a time. If the argument is of date/time/timestamp, it extracts the time value part from the expression. + +**Parameters**: +- `expr` (Required): A `STRING`, `DATE`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `TIME` + +#### Example ```ppl @@ -2103,7 +2304,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2125,7 +2326,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2147,7 +2348,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2169,7 +2370,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2185,33 +2386,32 @@ fetched rows / total rows = 1/1 ## TIME_FORMAT -### Description +**Usage**: `TIME_FORMAT(time, format)` + +Formats the `time` argument using the specifiers in the `format` argument. This supports a subset of the time format specifiers available for the [`DATE_FORMAT`](#date_format) function. Using date format specifiers supported by [`DATE_FORMAT`](#date_format) will return 0 or `NULL`. Acceptable format specifiers are listed in the following table. If an argument of type `DATE` is passed in, it is treated as a `TIMESTAMP` at midnight (i.e., 00:00:00). -Usage: `time_format(time, format)` formats the time argument using the specifiers in the format argument. -This supports a subset of the time format specifiers available for the [date_format](#date_format) function. -Using date format specifiers supported by [date_format](#date_format) will return 0 or null. -Acceptable format specifiers are listed in the table below. -If an argument of type DATE is passed in, it is treated as a TIMESTAMP at midnight (i.e., 00:00:00). -The following table describes the available specifier arguments. +**Parameters**: +- `time` (Required): A `STRING`, `DATE`, `TIME`, or `TIMESTAMP` value. +- `format` (Required): A `STRING` format specifier. +**Return type**: `STRING` + +The following table describes the available specifier arguments: | Specifier | Description | | --- | --- | -| %f | Microseconds (000000..999999) | -| %H | Hour (00..23) | -| %h | Hour (01..12) | -| %I | Hour (01..12) | -| %i | Minutes, numeric (00..59) | -| %p | AM or PM | -| %r | Time, 12-hour (hh:mm:ss followed by AM or PM) | -| %S | Seconds (00..59) | -| %s | Seconds (00..59) | -| %T | Time, 24-hour (hh:mm:ss) | - - -**Argument type:** `STRING/DATE/TIME/TIMESTAMP, STRING` -**Return type:** `STRING` -### Example +| `%f` | Microseconds (000000..999999) | +| `%H` | Hour (00..23) | +| `%h` | Hour (01..12) | +| `%I` | Hour (01..12) | +| `%i` | Minutes, numeric (00..59) | +| `%p` | `AM` or `PM` | +| `%r` | Time, 12-hour (hh:mm:ss followed by `AM` or `PM`) | +| `%S` | Seconds (00..59) | +| `%s` | Seconds (00..59) | +| `%T` | Time, 24-hour (hh:mm:ss) | + +#### Example ```ppl @@ -2221,7 +2421,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2237,12 +2437,16 @@ fetched rows / total rows = 1/1 ## TIME_TO_SEC -### Description +**Usage**: `TIME_TO_SEC(time)` -Usage: `time_to_sec(time)` returns the time argument, converted to seconds. -**Argument type:** `STRING/TIME/TIMESTAMP` -**Return type:** `LONG` -### Example +Returns the `time` argument, converted to seconds. + +**Parameters**: +- `time` (Required): A `STRING`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `LONG` + +#### Example ```ppl @@ -2252,7 +2456,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2268,12 +2472,17 @@ fetched rows / total rows = 1/1 ## TIMEDIFF -### Description +**Usage**: `TIMEDIFF(time1, time2)` -Usage: returns the difference between two time expressions as a time. -**Argument type:** `TIME, TIME` -**Return type:** `TIME` -### Example +Returns the difference between two time expressions as a time. + +**Parameters**: +- `time1` (Required): A `TIME` value. +- `time2` (Required): A `TIME` value. + +**Return type**: `TIME` + +#### Example ```ppl @@ -2283,7 +2492,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2299,15 +2508,17 @@ fetched rows / total rows = 1/1 ## TIMESTAMP -### Description +**Usage**: `TIMESTAMP(expr)` or `TIMESTAMP(expr1, expr2)` -Usage: `timestamp(expr)` constructs a timestamp type with the input string `expr` as an timestamp. If the argument is not a string, it casts `expr` to timestamp type with default timezone UTC. If argument is a time, it applies today's date before cast. -With two arguments `timestamp(expr1, expr2)` adds the time expression `expr2` to the date or timestamp expression `expr1` and returns the result as a timestamp value. -**Argument type:** `STRING/DATE/TIME/TIMESTAMP` -Return type map: -(STRING/DATE/TIME/TIMESTAMP) -> TIMESTAMP -(STRING/DATE/TIME/TIMESTAMP, STRING/DATE/TIME/TIMESTAMP) -> TIMESTAMP -### Example +Constructs a timestamp type with the input string `expr` as a timestamp. If the argument is not a string, it casts `expr` to a timestamp type with the default time zone UTC. If the argument is a time, it applies today's date before the cast. With two arguments, adds the time expression `expr2` to the date or timestamp expression `expr1` and returns the result as a timestamp value. + +**Parameters**: +- `expr` (Required): A `STRING`, `DATE`, `TIME`, or `TIMESTAMP` value. +- `expr2` (Optional): A `STRING`, `DATE`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `TIMESTAMP` + +#### Example ```ppl @@ -2317,7 +2528,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2333,14 +2544,18 @@ fetched rows / total rows = 1/1 ## TIMESTAMPADD -### Description +**Usage**: `TIMESTAMPADD(interval, count, datetime)` + +Returns a `TIMESTAMP` value based on a passed-in `DATE`/`TIME`/`TIMESTAMP`/`STRING` argument and an `INTERVAL` and `INTEGER` argument which determine the amount of time to be added. If the third argument is a `STRING`, it must be formatted as a valid `TIMESTAMP`. If only a `TIME` is provided, a `TIMESTAMP` is still returned with the `DATE` portion filled in using the current date. If the third argument is a `DATE`, it will be automatically converted to a `TIMESTAMP`. -Usage: Returns a TIMESTAMP value based on a passed in DATE/TIME/TIMESTAMP/STRING argument and an INTERVAL and INTEGER argument which determine the amount of time to be added. -If the third argument is a STRING, it must be formatted as a valid TIMESTAMP. If only a TIME is provided, a TIMESTAMP is still returned with the DATE portion filled in using the current date. -If the third argument is a DATE, it will be automatically converted to a TIMESTAMP. -**Argument type:** `INTERVAL, INTEGER, DATE/TIME/TIMESTAMP/STRING` -INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] -Examples +**Parameters**: +- `interval` (Required): One of: `MICROSECOND`, `SECOND`, `MINUTE`, `HOUR`, `DAY`, `WEEK`, `MONTH`, `QUARTER`, `YEAR`. +- `count` (Required): An `INTEGER` number of intervals to add. +- `datetime` (Required): A `DATE`, `TIME`, `TIMESTAMP`, or `STRING` value. + +**Return type**: `TIMESTAMP` + +**Examples** ```ppl @@ -2351,7 +2566,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2367,15 +2582,18 @@ fetched rows / total rows = 1/1 ## TIMESTAMPDIFF -### Description +**Usage**: `TIMESTAMPDIFF(interval, start, end)` + +Returns the difference between the start and end date/times in interval units. If a `TIME` is provided as an argument, it will be converted to a `TIMESTAMP` with the `DATE` portion filled in using the current date. Arguments will be automatically converted to a `TIME`/`TIMESTAMP` when appropriate. Any argument that is a `STRING` must be formatted as a valid `TIMESTAMP`. -Usage: `TIMESTAMPDIFF(interval, start, end)` returns the difference between the start and end date/times in interval units. -If a TIME is provided as an argument, it will be converted to a TIMESTAMP with the DATE portion filled in using the current date. -Arguments will be automatically converted to a TIME/TIMESTAMP when appropriate. -Any argument that is a STRING must be formatted as a valid TIMESTAMP. -**Argument type:** `INTERVAL, DATE/TIME/TIMESTAMP/STRING, DATE/TIME/TIMESTAMP/STRING` -INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] -Examples +**Parameters**: +- `interval` (Required): One of: `MICROSECOND`, `SECOND`, `MINUTE`, `HOUR`, `DAY`, `WEEK`, `MONTH`, `QUARTER`, `YEAR`. +- `start` (Required): A `DATE`, `TIME`, `TIMESTAMP`, or `STRING` value. +- `end` (Required): A `DATE`, `TIME`, `TIMESTAMP`, or `STRING` value. + +**Return type**: `LONG` + +**Examples** ```ppl @@ -2386,7 +2604,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2402,12 +2620,16 @@ fetched rows / total rows = 1/1 ## TO_DAYS -### Description +**Usage**: `TO_DAYS(date)` -Usage: `to_days(date)` returns the day number (the number of days since year 0) of the given date. Returns NULL if date is invalid. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `LONG` -### Example +Returns the day number (the number of days since year 0) of the given date. Returns `NULL` if date is invalid. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `LONG` + +#### Example ```ppl @@ -2417,7 +2639,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2433,13 +2655,16 @@ fetched rows / total rows = 1/1 ## TO_SECONDS -### Description +**Usage**: `TO_SECONDS(date)` -Usage: `to_seconds(date)` returns the number of seconds since the year 0 of the given value. Returns NULL if value is invalid. -An argument of a LONG type can be used. It must be formatted as YMMDD, YYMMDD, YYYMMDD or YYYYMMDD. Note that a LONG type argument cannot have leading 0s as it will be parsed using an octal numbering system. -**Argument type:** `STRING/LONG/DATE/TIME/TIMESTAMP` -**Return type:** `LONG` -### Example +Returns the number of seconds since the year 0 of the given value. Returns `NULL` if value is invalid. An argument of a `LONG` type can be used. It must be formatted as YMMDD, YYMMDD, YYYMMDD, or YYYYMMDD. Note that a `LONG` type argument cannot have leading 0s as it will be parsed using an octal numbering system. + +**Parameters**: +- `date` (Required): A `STRING`, `LONG`, `DATE`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `LONG` + +#### Example ```ppl @@ -2450,7 +2675,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2466,15 +2691,16 @@ fetched rows / total rows = 1/1 ## UNIX_TIMESTAMP -### Description +**Usage**: `UNIX_TIMESTAMP()` or `UNIX_TIMESTAMP(date)` -Usage: Converts given argument to Unix time (seconds since Epoch - very beginning of year 1970). If no argument given, it returns the current Unix time. -The date argument may be a DATE, or TIMESTAMP string, or a number in YYMMDD, YYMMDDhhmmss, YYYYMMDD, or YYYYMMDDhhmmss format. If the argument includes a time part, it may optionally include a fractional seconds part. -If argument is in invalid format or outside of range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), function returns NULL. -You can use [FROM_UNIXTIME](#from_unixtime) to do reverse conversion. -**Argument type:** `\/DOUBLE/DATE/TIMESTAMP` -**Return type:** `DOUBLE` -### Example +Converts the given argument to Unix time (seconds since Epoch - the very beginning of the year 1970). If no argument is given, it returns the current Unix time. The date argument may be a `DATE`, or `TIMESTAMP` string, or a number in YYMMDD, YYMMDDhhmmss, YYYYMMDD, or YYYYMMDDhhmmss format. If the argument includes a time part, it may optionally include a fractional seconds part. If the argument is in an invalid format or outside the range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), the function returns `NULL`. You can use [`FROM_UNIXTIME`](#from_unixtime) to perform the reverse conversion. + +**Parameters**: +- `date` (Optional): A `DOUBLE`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `DOUBLE` + +#### Example ```ppl @@ -2484,7 +2710,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2500,12 +2726,15 @@ fetched rows / total rows = 1/1 ## UTC_DATE -### Description +**Usage**: `UTC_DATE()` -Returns the current UTC date as a value in 'YYYY-MM-DD'. -**Return type:** `DATE` -Specification: UTC_DATE() -> DATE -### Example +Returns the current UTC date as a value in `YYYY-MM-DD` format. + +**Parameters**: None + +**Return type**: `DATE` + +#### Example ```ppl ignore @@ -2515,7 +2744,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2531,12 +2760,15 @@ fetched rows / total rows = 1/1 ## UTC_TIME -### Description +**Usage**: `UTC_TIME()` Returns the current UTC time as a value in 'hh:mm:ss'. -**Return type:** `TIME` -Specification: UTC_TIME() -> TIME -### Example + +**Parameters**: None + +**Return type**: `TIME` + +#### Example ```ppl ignore @@ -2546,7 +2778,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2562,12 +2794,15 @@ fetched rows / total rows = 1/1 ## UTC_TIMESTAMP -### Description +**Usage**: `UTC_TIMESTAMP()` Returns the current UTC timestamp as a value in 'YYYY-MM-DD hh:mm:ss'. -**Return type:** `TIMESTAMP` -Specification: UTC_TIMESTAMP() -> TIMESTAMP -### Example + +**Parameters**: None + +**Return type**: `TIMESTAMP` + +#### Example ```ppl ignore @@ -2577,7 +2812,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2593,11 +2828,19 @@ fetched rows / total rows = 1/1 ## WEEK -### Description +**Usage**: `WEEK(date)` or `WEEK(date, mode)` + +Returns the week number for `date`. If the mode argument is omitted, the default mode 0 is used. -Usage: `week(date[, mode])` returns the week number for date. If the mode argument is omitted, the default mode 0 is used. -The following table describes how the mode argument works. +**Parameters**: +- `date` (Required): A `DATE`, `TIMESTAMP`, or `STRING` value. +- `mode` (Optional): An `INTEGER` mode value (0-7). +**Return type**: `INTEGER` + +Synonyms: [`WEEK_OF_YEAR`](#week_of_year) + +The following table describes how the `mode` parameter works. | Mode | First day of week | Range | Week 1 is the first week ... | | --- | --- | --- | --- | @@ -2609,12 +2852,8 @@ The following table describes how the mode argument works. | 5 | Monday | 0-53 | with a Monday in this year | | 6 | Sunday | 1-53 | with 4 or more days this year | | 7 | Monday | 1-53 | with a Monday in this year | - -**Argument type:** `DATE/TIMESTAMP/STRING` -**Return type:** `INTEGER` -Synonyms: [WEEK_OF_YEAR](#week_of_year) -### Example +#### Example ```ppl @@ -2624,7 +2863,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2640,13 +2879,16 @@ fetched rows / total rows = 1/1 ## WEEKDAY -### Description +**Usage**: `WEEKDAY(date)` -Usage: `weekday(date)` returns the weekday index for date (0 = Monday, 1 = Tuesday, ..., 6 = Sunday). -It is similar to the [dayofweek](#dayofweek) function, but returns different indexes for each day. -**Argument type:** `STRING/DATE/TIME/TIMESTAMP` -**Return type:** `INTEGER` -### Example +Returns the weekday index for `date` (0 = Monday, 1 = Tuesday, ..., 6 = Sunday). It is similar to the [`DAYOFWEEK`](#dayofweek) function, but returns different indexes for each day. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, `TIME`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +#### Example ```ppl @@ -2657,7 +2899,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2673,11 +2915,19 @@ fetched rows / total rows = 1/1 ## WEEK_OF_YEAR -### Description +**Usage**: `WEEK_OF_YEAR(date)` or `WEEK_OF_YEAR(date, mode)` + +Returns the week number for `date`. If the mode argument is omitted, the default mode 0 is used. + +**Parameters**: +- `date` (Required): A `DATE`, `TIMESTAMP`, or `STRING` value. +- `mode` (Optional): An `INTEGER` mode value (0-7). + +**Return type**: `INTEGER` -Usage: `week_of_year(date[, mode])` returns the week number for date. If the mode argument is omitted, the default mode 0 is used. -The following table describes how the mode argument works. +Synonyms: [`WEEK`](#week) +The following table describes how the mode argument works: | Mode | First day of week | Range | Week 1 is the first week ... | | --- | --- | --- | --- | @@ -2689,12 +2939,8 @@ The following table describes how the mode argument works. | 5 | Monday | 0-53 | with a Monday in this year | | 6 | Sunday | 1-53 | with 4 or more days this year | | 7 | Monday | 1-53 | with a Monday in this year | - -**Argument type:** `DATE/TIMESTAMP/STRING` -**Return type:** `INTEGER` -Synonyms: [WEEK](#week) -### Example +#### Example ```ppl @@ -2704,7 +2950,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2720,12 +2966,16 @@ fetched rows / total rows = 1/1 ## YEAR -### Description +**Usage**: `YEAR(date)` -Usage: `year(date)` returns the year for date, in the range 1000 to 9999, or 0 for the “zero” date. -**Argument type:** `STRING/DATE/TIMESTAMP` -**Return type:** `INTEGER` -### Example +Returns the year for `date`, in the range 1000 to 9999, or 0 for the "zero" date. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, or `TIMESTAMP` value. + +**Return type**: `INTEGER` + +#### Example ```ppl @@ -2735,7 +2985,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text @@ -2751,12 +3001,17 @@ fetched rows / total rows = 1/1 ## YEARWEEK -### Description +**Usage**: `YEARWEEK(date)` or `YEARWEEK(date, mode)` -Usage: `yearweek(date[, mode])` returns the year and week for date as an integer. It accepts and optional mode arguments aligned with those available for the [WEEK](#week) function. -**Argument type:** `STRING/DATE/TIME/TIMESTAMP` -**Return type:** `INTEGER` -### Example +Returns the year and week for `date` as an integer. It accepts an optional mode argument aligned with those available for the [`WEEK`](#week) function. + +**Parameters**: +- `date` (Required): A `STRING`, `DATE`, `TIME`, or `TIMESTAMP` value. +- `mode` (Optional): An `INTEGER` mode value (0-7). + +**Return type**: `INTEGER` + +#### Example ```ppl @@ -2767,7 +3022,7 @@ source=people ``` -Expected output: +The query returns the following results: ```text diff --git a/docs/user/ppl/functions/expressions.md b/docs/user/ppl/functions/expressions.md index 999531cabbe..e42d867705c 100644 --- a/docs/user/ppl/functions/expressions.md +++ b/docs/user/ppl/functions/expressions.md @@ -1,34 +1,27 @@ -# Expressions +# Expressions -## Introduction +Expressions, particularly value expressions, return a scalar value. Expressions have different types and forms. For example, there are literal values as atomic expressions, as well as arithmetic, predicate, and function expressions built on top of them. You can use expressions in different clauses, such as arithmetic expressions in the `Filter` or `Stats` commands. -Expressions, particularly value expressions, are those which return a scalar value. Expressions have different types and forms. For example, there are literal values as atom expression and arithmetic, predicate and function expression built on top of them. And also expressions can be used in different clauses, such as using arithmetic expression in `Filter`, `Stats` command. -## Arithmetic Operators +## Arithmetic operators -### Description +Arithmetic expressions are formed by combining numeric literals and binary arithmetic operators. The following operators are available: +1. `+`: Addition +2. `-`: Subtraction +3. `*`: Multiplication +4. `/`: Division. When [`plugins.ppl.syntax.legacy.preferred`](../admin/settings.md) is `true` (default), integer operands follow the legacy truncating result. When the setting is `false`, the operands are promoted to floating-point, preserving the fractional part. Division by zero returns `NULL`. +5. `%`: Modulo. This operator can only be used with integers and returns the remainder of the division. -#### Operators +### Precedence -Arithmetic expression is an expression formed by numeric literals and binary arithmetic operators as follows: -1. `+`: Add. -2. `-`: Subtract. -3. `*`: Multiply. -4. `/`: Divide. Integer operands follow the legacy truncating result when - - [plugins.ppl.syntax.legacy.preferred](../admin/settings.md) is `true` (default). When the - setting is `false` the operands are promoted to floating point, preserving - the fractional part. Division by zero still returns `NULL`. -5. `%`: Modulo. This can be used with integers only with remainder of the division as result. - -#### Precedence +You can use parentheses to control the precedence of arithmetic operators. Otherwise, operators with higher precedence are performed first. + +### Type conversion -Parentheses can be used to control the precedence of arithmetic operators. Otherwise, operators of higher precedence is performed first. -#### Type Conversion +The system performs implicit type conversion when determining which operator to use. For example, adding an integer to a real number matches the signature `+(double,double)`, which results in a real number. The same type conversion rules apply to function calls. -Implicit type conversion is performed when looking up operator signature. For example, an integer `+` a real number matches signature `+(double,double)` which results in a real number. This rule also applies to function call discussed below. -### Examples +### Examples -Here is an example for different type of arithmetic expressions +The following are examples of different types of arithmetic expressions: ```ppl source=accounts @@ -36,7 +29,7 @@ source=accounts | fields age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 3/3 @@ -49,36 +42,46 @@ fetched rows / total rows = 3/3 +-----+ ``` -## Predicate Operators +## Predicate operators + +Predicate operators are expressions that evaluate to `true` or `false`. + +Comparisons for `MISSING` and `NULL` values follow these rules: -### Description +- `MISSING` values only equal other `MISSING` values and are less than all other values. +- `NULL` values equal other `NULL` values, are greater than `MISSING` values, but less than all other values. -Predicate operator is an expression that evaluated to be ture. The MISSING and NULL value comparison has following the rule. MISSING value only equal to MISSING value and less than all the other values. NULL value equals to NULL value, large than MISSING value, but less than all the other values. -#### Operators +### Operators -| name | description | +| Name | Description | | --- | --- | -| > | Greater than operator | -| >= | Greater than or equal operator | -| < | Less than operator | -| != | Not equal operator | -| <= | Less than or equal operator | -| = | Equal operator | -| == | Equal operator (alternative syntax) | -| LIKE | Simple Pattern matching | -| IN | NULL value test | -| AND | AND operator | -| OR | OR operator | -| XOR | XOR operator | -| NOT | NOT NULL value test | - -It is possible to compare datetimes. When comparing different datetime types, for example `DATE` and `TIME`, both converted to `DATETIME`. -The following rule is applied on coversion: a `TIME` applied to today's date; `DATE` is interpreted at midnight. -### Examples - -#### Basic Predicate Operator - -Here is an example for comparison operators +| `>` | Greater than | +| `>=` | Greater than or equal to | +| `<` | Less than | +| `!=` | Not equal to | +| `<=` | Less than or equal to | +| `=` | Equal to | +| `==` | Equal to (alternative syntax) | +| `LIKE` | Simple pattern matching | +| `IN` | Value list membership test | +| `AND` | Logical AND | +| `OR` | Logical OR | +| `XOR` | Logical XOR | +| `NOT` | Logical NOT | + +You can compare date and time values. When comparing different date and time types (for example, `DATE` and `TIME`), both values are converted to `DATETIME`. + +The following conversion rules are applied: +- A `TIME` value is combined with today's date. +- A `DATE` value is interpreted as midnight on that date. + +### Examples + +The following examples demonstrate how to use predicate operators in PPL queries. + +#### Basic predicate operators + +The following is an example of comparison operators: ```ppl source=accounts @@ -86,7 +89,7 @@ source=accounts | fields age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -97,7 +100,7 @@ fetched rows / total rows = 1/1 +-----+ ``` -The `==` operator can be used as an alternative to `=` for equality comparisons +The `==` operator can be used as an alternative to `=` for equality comparisons. ```ppl source=accounts @@ -105,7 +108,7 @@ source=accounts | fields age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -116,10 +119,10 @@ fetched rows / total rows = 1/1 +-----+ ``` -Note: Both `=` and `==` perform the same equality comparison. You can use either based on your preference. +> **Note**: Both `=` and `==` perform the same equality comparison. You can use either based on your preference. #### IN -IN operator test field in value lists +The `IN` operator tests whether a field value is in the specified list of values. ```ppl source=accounts @@ -127,7 +130,7 @@ source=accounts | fields age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -141,7 +144,7 @@ fetched rows / total rows = 2/2 #### OR -OR operator +The `OR` operator performs a logical OR operation between two Boolean expressions. ```ppl source=accounts @@ -149,7 +152,7 @@ source=accounts | fields age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -163,7 +166,7 @@ fetched rows / total rows = 2/2 #### NOT -NOT operator +The `NOT` operator performs a logical NOT operation, negating a Boolean expression. ```ppl source=accounts @@ -171,7 +174,7 @@ source=accounts | fields age ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 diff --git a/docs/user/ppl/functions/index.md b/docs/user/ppl/functions/index.md index cdfbbd201ce..2ade4a09b98 100644 --- a/docs/user/ppl/functions/index.md +++ b/docs/user/ppl/functions/index.md @@ -3,237 +3,238 @@ PPL supports a wide range of built-in functions for data processing and analysis. These functions are organized into categories based on their functionality and can be used within PPL queries to manipulate and transform data. -- [Aggregation Functions](aggregations.md) - - [COUNT](aggregations.md/#count) - - [SUM](aggregations.md/#sum) - - [AVG](aggregations.md/#avg) - - [MAX](aggregations.md/#max) - - [MIN](aggregations.md/#min) - - [VAR_SAMP](aggregations.md/#var_samp) - - [VAR_POP](aggregations.md/#var_pop) - - [STDDEV_SAMP](aggregations.md/#stddev_samp) - - [STDDEV_POP](aggregations.md/#stddev_pop) - - [DISTINCT_COUNT, DC](aggregations.md/#distinct_count-dc) - - [DISTINCT_COUNT_APPROX](aggregations.md/#distinct_count_approx) - - [EARLIEST](aggregations.md/#earliest) - - [LATEST](aggregations.md/#latest) - - [TAKE](aggregations.md/#take) - - [PERCENTILE, PERCENTILE_APPROX](aggregations.md/#percentile-or-percentile_approx) - - [MEDIAN](aggregations.md/#median) - - [FIRST](aggregations.md/#first) - - [LAST](aggregations.md/#last) - - [LIST](aggregations.md/#list) - - [VALUES](aggregations.md/#values) - -- [Collection Functions](collection.md) - - [ARRAY](collection.md/#array) - - [ARRAY_LENGTH](collection.md/#array_length) - - [FORALL](collection.md/#forall) - - [EXISTS](collection.md/#exists) - - [FILTER](collection.md/#filter) - - [TRANSFORM](collection.md/#transform) - - [REDUCE](collection.md/#reduce) - - [MVJOIN](collection.md/#mvjoin) - - [MVAPPEND](collection.md/#mvappend) - - [SPLIT](collection.md/#split) - - [MVDEDUP](collection.md/#mvdedup) - - [MVFIND](collection.md/#mvfind) - - [MVINDEX](collection.md/#mvindex) - - [MVMAP](collection.md/#mvmap) - - [MVZIP](collection.md/#mvzip) - -- [Condition Functions](condition.md) - - [ISNULL](condition.md/#isnull) - - [ISNOTNULL](condition.md/#isnotnull) - - [EXISTS](condition.md/#exists) - - [IFNULL](condition.md/#ifnull) - - [NULLIF](condition.md/#nullif) - - [IF](condition.md/#if) - - [CASE](condition.md/#case) - - [COALESCE](condition.md/#coalesce) - - [ISPRESENT](condition.md/#ispresent) - - [ISBLANK](condition.md/#isblank) - - [ISEMPTY](condition.md/#isempty) - - [EARLIEST](condition.md/#earliest) - - [LATEST](condition.md/#latest) - - [REGEXP_MATCH](condition.md/#regexp_match) - - [CONTAINS](condition.md/#contains) - -- [Type Conversion Functions](conversion.md) - - [CAST](conversion.md/#cast) - - [TOSTRING](conversion.md/#tostring) - - [TONUMBER](conversion.md/#tonumber) - -- [Cryptographic Functions](cryptographic.md) - - [SHA1](cryptographic.md/#sha1) - - [SHA2](cryptographic.md/#sha2) - -- [Date and Time Functions](datetime.md) - - [ADDDATE](datetime.md/#adddate) - - [ADDTIME](datetime.md/#addtime) - - [CONVERT_TZ](datetime.md/#convert_tz) - - [CURDATE](datetime.md/#curdate) - - [CURRENT_DATE](datetime.md/#current_date) - - [CURRENT_TIME](datetime.md/#current_time) - - [CURRENT_TIMESTAMP](datetime.md/#current_timestamp) - - [CURTIME](datetime.md/#curtime) - - [DATE](datetime.md/#date) - - [DATE_ADD](datetime.md/#date_add) - - [DATE_FORMAT](datetime.md/#date_format) - - [DATETIME](datetime.md/#datetime) - - [DATE_SUB](datetime.md/#date_sub) - - [DATEDIFF](datetime.md/#datediff) - - [DAY](datetime.md/#day) - - [DAYNAME](datetime.md/#dayname) - - [DAYOFMONTH](datetime.md/#dayofmonth) - - [DAY_OF_MONTH](datetime.md/#day_of_month) - - [DAYOFWEEK](datetime.md/#dayofweek) - - [DAY_OF_WEEK](datetime.md/#day_of_week) - - [DAYOFYEAR](datetime.md/#dayofyear) - - [DAY_OF_YEAR](datetime.md/#day_of_year) - - [EXTRACT](datetime.md/#extract) - - [FROM_DAYS](datetime.md/#from_days) - - [FROM_UNIXTIME](datetime.md/#from_unixtime) - - [GET_FORMAT](datetime.md/#get_format) - - [HOUR](datetime.md/#hour) - - [HOUR_OF_DAY](datetime.md/#hour_of_day) - - [LAST_DAY](datetime.md/#last_day) - - [LOCALTIMESTAMP](datetime.md/#localtimestamp) - - [LOCALTIME](datetime.md/#localtime) - - [MAKEDATE](datetime.md/#makedate) - - [MAKETIME](datetime.md/#maketime) - - [MICROSECOND](datetime.md/#microsecond) - - [MINUTE](datetime.md/#minute) - - [MINUTE_OF_HOUR](datetime.md/#minute_of_hour) - - [MONTH](datetime.md/#month) - - [MONTH_OF_YEAR](datetime.md/#month_of_year) - - [MONTHNAME](datetime.md/#monthname) - - [NOW](datetime.md/#now) - - [PERIOD_ADD](datetime.md/#period_add) - - [PERIOD_DIFF](datetime.md/#period_diff) - - [QUARTER](datetime.md/#quarter) - - [SEC_TO_TIME](datetime.md/#sec_to_time) - - [SECOND](datetime.md/#second) - - [SECOND_OF_MINUTE](datetime.md/#second_of_minute) - - [STRFTIME](datetime.md/#strftime) - - [STR_TO_DATE](datetime.md/#str_to_date) - - [SUBDATE](datetime.md/#subdate) - - [SUBTIME](datetime.md/#subtime) - - [SYSDATE](datetime.md/#sysdate) - - [TIME](datetime.md/#time) - - [TIME_FORMAT](datetime.md/#time_format) - - [TIME_TO_SEC](datetime.md/#time_to_sec) - - [TIMEDIFF](datetime.md/#timediff) - - [TIMESTAMP](datetime.md/#timestamp) - - [TIMESTAMPADD](datetime.md/#timestampadd) - - [TIMESTAMPDIFF](datetime.md/#timestampdiff) - - [TO_DAYS](datetime.md/#to_days) - - [TO_SECONDS](datetime.md/#to_seconds) - - [UNIX_TIMESTAMP](datetime.md/#unix_timestamp) - - [UTC_DATE](datetime.md/#utc_date) - - [UTC_TIME](datetime.md/#utc_time) - - [UTC_TIMESTAMP](datetime.md/#utc_timestamp) - - [WEEK](datetime.md/#week) - - [WEEKDAY](datetime.md/#weekday) - - [WEEK_OF_YEAR](datetime.md/#week_of_year) - - [YEAR](datetime.md/#year) - - [YEARWEEK](datetime.md/#yearweek) - -- [Expressions](expressions.md) - - [Arithmetic Operators](expressions.md#arithmetic-operators) - - [Predicate Operators](expressions.md/#predicate-operators) - -- [IP Address Functions](ip.md) - - [CIDRMATCH](ip.md/#cidrmatch) - - [GEOIP](ip.md/#geoip) - -- [JSON Functions](json.md) - - [JSON](json.md/#json) - - [JSON_VALID](json.md/#json_valid) - - [JSON_OBJECT](json.md/#json_object) - - [JSON_ARRAY](json.md/#json_array) - - [JSON_ARRAY_LENGTH](json.md/#json_array_length) - - [JSON_EXTRACT](json.md/#json_extract) - - [JSON_DELETE](json.md/#json_delete) - - [JSON_SET](json.md/#json_set) - - [JSON_APPEND](json.md/#json_append) - - [JSON_EXTEND](json.md/#json_extend) - - [JSON_KEYS](json.md/#json_keys) - -- [Mathematical Functions](math.md) - - [ADD](math.md/#add) - - [SUBTRACT](math.md/#subtract) - - [MULTIPLY](math.md/#multiply) - - [DIVIDE](math.md/#divide) - - [SUM](math.md/#sum) - - [AVG](math.md/#avg) - - [ACOS](math.md/#acos) - - [ASIN](math.md/#asin) - - [ATAN](math.md/#atan) - - [ATAN2](math.md/#atan2) - - [CEIL](math.md/#ceil) - - [CEILING](math.md/#ceiling) - - [CONV](math.md/#conv) - - [COS](math.md/#cos) - - [COSH](math.md/#cosh) - - [COT](math.md/#cot) - - [CRC32](math.md/#crc32) - - [DEGREES](math.md/#degrees) - - [E](math.md/#e) - - [EXP](math.md/#exp) - - [EXPM1](math.md/#expm1) - - [FLOOR](math.md/#floor) - - [LN](math.md/#ln) - - [LOG](math.md/#log) - - [LOG2](math.md/#log2) - - [LOG10](math.md/#log10) - - [MOD](math.md/#mod) - - [MODULUS](math.md/#modulus) - - [PI](math.md/#pi) - - [POW](math.md/#pow) - - [POWER](math.md/#power) - - [RADIANS](math.md/#radians) - - [RAND](math.md/#rand) - - [ROUND](math.md/#round) - - [SIGN](math.md/#sign) - - [SIGNUM](math.md/#signum) - - [SIN](math.md/#sin) - - [SINH](math.md/#sinh) - - [SQRT](math.md/#sqrt) - - [CBRT](math.md/#cbrt) - - [RINT](math.md/#rint) - -- [Relevance Functions](relevance.md) - - [MATCH](relevance.md/#match) - - [MATCH_PHRASE](relevance.md/#match_phrase) - - [MATCH_PHRASE_PREFIX](relevance.md/#match_phrase_prefix) - - [MULTI_MATCH](relevance.md/#multi_match) - - [SIMPLE_QUERY_STRING](relevance.md/#simple_query_string) - - [MATCH_BOOL_PREFIX](relevance.md/#match_bool_prefix) - - [QUERY_STRING](relevance.md/#query_string) - -- [Statistical Functions](statistical.md) - - [MAX](statistical.md/#max) - - [MIN](statistical.md/#min) - -- [String Functions](string.md) - - [CONCAT](string.md/#concat) - - [CONCAT_WS](string.md/#concat_ws) - - [LENGTH](string.md/#length) - - [LIKE](string.md/#like) - - [ILIKE](string.md/#ilike) - - [LOCATE](string.md/#locate) - - [LOWER](string.md/#lower) - - [LTRIM](string.md/#ltrim) - - [POSITION](string.md/#position) - - [REPLACE](string.md/#replace) - - [REVERSE](string.md/#reverse) - - [RIGHT](string.md/#right) - - [RTRIM](string.md/#rtrim) - - [SUBSTRING](string.md/#substring) - - [TRIM](string.md/#trim) - - [UPPER](string.md/#upper) - - [REGEXP_REPLACE](string.md/#regexp_replace) - -- [System Functions](system.md) - - [TYPEOF](system.md/#typeof) \ No newline at end of file +- [Aggregation functions](aggregations.md): + - [COUNT](aggregations.md/#count). + - [SUM](aggregations.md/#sum). + - [AVG](aggregations.md/#avg). + - [MAX](aggregations.md/#max). + - [MIN](aggregations.md/#min). + - [VAR_SAMP](aggregations.md/#var_samp). + - [VAR_POP](aggregations.md/#var_pop). + - [STDDEV_SAMP](aggregations.md/#stddev_samp). + - [STDDEV_POP](aggregations.md/#stddev_pop). + - [DISTINCT_COUNT, DC](aggregations.md/#distinct_count-dc). + - [DISTINCT_COUNT_APPROX](aggregations.md/#distinct_count_approx). + - [EARLIEST](aggregations.md/#earliest). + - [LATEST](aggregations.md/#latest). + - [TAKE](aggregations.md/#take). + - [PERCENTILE, PERCENTILE_APPROX](aggregations.md/#percentile-percentile_approx). + - [MEDIAN](aggregations.md/#median). + - [FIRST](aggregations.md/#first). + - [LAST](aggregations.md/#last). + - [LIST](aggregations.md/#list). + - [VALUES](aggregations.md/#values). + +- [Collection functions](collection.md): + - [ARRAY](collection.md/#array). + - [ARRAY_LENGTH](collection.md/#array_length). + - [FORALL](collection.md/#forall). + - [EXISTS](collection.md/#exists). + - [FILTER](collection.md/#filter). + - [TRANSFORM](collection.md/#transform). + - [REDUCE](collection.md/#reduce). + - [MVJOIN](collection.md/#mvjoin). + - [MVAPPEND](collection.md/#mvappend). + - [SPLIT](collection.md/#split). + - [MVDEDUP](collection.md/#mvdedup). + - [MVFIND](collection.md/#mvfind). + - [MVINDEX](collection.md/#mvindex). + - [MVMAP](collection.md/#mvmap). + - [MVZIP](collection.md/#mvzip). + +- [Conditional functions](condition.md): + - [ISNULL](condition.md/#isnull). + - [ISNOTNULL](condition.md/#isnotnull). + - [EXISTS](condition.md/#exists). + - [IFNULL](condition.md/#ifnull). + - [NULLIF](condition.md/#nullif). + - [IF](condition.md/#if). + - [CASE](condition.md/#case). + - [COALESCE](condition.md/#coalesce). + - [ISPRESENT](condition.md/#ispresent). + - [ISBLANK](condition.md/#isblank). + - [ISEMPTY](condition.md/#isempty). + - [EARLIEST](condition.md/#earliest). + - [LATEST](condition.md/#latest). + - [REGEXP_MATCH](condition.md/#regexp_match). + - [CONTAINS](condition.md/#contains). + +- [Type conversion functions](conversion.md): + - [CAST](conversion.md/#cast). + - [TOSTRING](conversion.md/#tostring). + - [TONUMBER](conversion.md/#tonumber). + +- [Cryptographic functions](cryptographic.md): + - [MD5](cryptographic.md/#md5). + - [SHA1](cryptographic.md/#sha1). + - [SHA2](cryptographic.md/#sha2). + +- [Date and time functions](datetime.md): + - [ADDDATE](datetime.md/#adddate). + - [ADDTIME](datetime.md/#addtime). + - [CONVERT_TZ](datetime.md/#convert_tz). + - [CURDATE](datetime.md/#curdate). + - [CURRENT_DATE](datetime.md/#current_date). + - [CURRENT_TIME](datetime.md/#current_time). + - [CURRENT_TIMESTAMP](datetime.md/#current_timestamp). + - [CURTIME](datetime.md/#curtime). + - [DATE](datetime.md/#date). + - [DATE_ADD](datetime.md/#date_add). + - [DATE_FORMAT](datetime.md/#date_format). + - [DATETIME](datetime.md/#datetime). + - [DATE_SUB](datetime.md/#date_sub). + - [DATEDIFF](datetime.md/#datediff). + - [DAY](datetime.md/#day). + - [DAYNAME](datetime.md/#dayname). + - [DAYOFMONTH](datetime.md/#dayofmonth). + - [DAY_OF_MONTH](datetime.md/#day_of_month). + - [DAYOFWEEK](datetime.md/#dayofweek). + - [DAY_OF_WEEK](datetime.md/#day_of_week). + - [DAYOFYEAR](datetime.md/#dayofyear). + - [DAY_OF_YEAR](datetime.md/#day_of_year). + - [EXTRACT](datetime.md/#extract). + - [FROM_DAYS](datetime.md/#from_days). + - [FROM_UNIXTIME](datetime.md/#from_unixtime). + - [GET_FORMAT](datetime.md/#get_format). + - [HOUR](datetime.md/#hour). + - [HOUR_OF_DAY](datetime.md/#hour_of_day). + - [LAST_DAY](datetime.md/#last_day). + - [LOCALTIMESTAMP](datetime.md/#localtimestamp). + - [LOCALTIME](datetime.md/#localtime). + - [MAKEDATE](datetime.md/#makedate). + - [MAKETIME](datetime.md/#maketime). + - [MICROSECOND](datetime.md/#microsecond). + - [MINUTE](datetime.md/#minute). + - [MINUTE_OF_HOUR](datetime.md/#minute_of_hour). + - [MONTH](datetime.md/#month). + - [MONTH_OF_YEAR](datetime.md/#month_of_year). + - [MONTHNAME](datetime.md/#monthname). + - [NOW](datetime.md/#now). + - [PERIOD_ADD](datetime.md/#period_add). + - [PERIOD_DIFF](datetime.md/#period_diff). + - [QUARTER](datetime.md/#quarter). + - [SEC_TO_TIME](datetime.md/#sec_to_time). + - [SECOND](datetime.md/#second). + - [SECOND_OF_MINUTE](datetime.md/#second_of_minute). + - [STRFTIME](datetime.md/#strftime). + - [STR_TO_DATE](datetime.md/#str_to_date). + - [SUBDATE](datetime.md/#subdate). + - [SUBTIME](datetime.md/#subtime). + - [SYSDATE](datetime.md/#sysdate). + - [TIME](datetime.md/#time). + - [TIME_FORMAT](datetime.md/#time_format). + - [TIME_TO_SEC](datetime.md/#time_to_sec). + - [TIMEDIFF](datetime.md/#timediff). + - [TIMESTAMP](datetime.md/#timestamp). + - [TIMESTAMPADD](datetime.md/#timestampadd). + - [TIMESTAMPDIFF](datetime.md/#timestampdiff). + - [TO_DAYS](datetime.md/#to_days). + - [TO_SECONDS](datetime.md/#to_seconds). + - [UNIX_TIMESTAMP](datetime.md/#unix_timestamp). + - [UTC_DATE](datetime.md/#utc_date). + - [UTC_TIME](datetime.md/#utc_time). + - [UTC_TIMESTAMP](datetime.md/#utc_timestamp). + - [WEEK](datetime.md/#week). + - [WEEKDAY](datetime.md/#weekday). + - [WEEK_OF_YEAR](datetime.md/#week_of_year). + - [YEAR](datetime.md/#year). + - [YEARWEEK](datetime.md/#yearweek). + +- [Expressions](expressions.md): + - [Arithmetic operators](expressions.md#arithmetic-operators). + - [Predicate operators](expressions.md/#predicate-operators). + +- [IP address functions](ip.md): + - [CIDRMATCH](ip.md/#cidrmatch). + - [GEOIP](ip.md/#geoip). + +- [JSON functions](json.md): + - [JSON](json.md/#json). + - [JSON_VALID](json.md/#json_valid). + - [JSON_OBJECT](json.md/#json_object). + - [JSON_ARRAY](json.md/#json_array). + - [JSON_ARRAY_LENGTH](json.md/#json_array_length). + - [JSON_EXTRACT](json.md/#json_extract). + - [JSON_DELETE](json.md/#json_delete). + - [JSON_SET](json.md/#json_set). + - [JSON_APPEND](json.md/#json_append). + - [JSON_EXTEND](json.md/#json_extend). + - [JSON_KEYS](json.md/#json_keys). + +- [Mathematical functions](math.md): + - [ADD](math.md/#add). + - [SUBTRACT](math.md/#subtract). + - [MULTIPLY](math.md/#multiply). + - [DIVIDE](math.md/#divide). + - [SUM](math.md/#sum). + - [AVG](math.md/#avg). + - [ACOS](math.md/#acos). + - [ASIN](math.md/#asin). + - [ATAN](math.md/#atan). + - [ATAN2](math.md/#atan2). + - [CEIL](math.md/#ceil). + - [CEILING](math.md/#ceiling). + - [CONV](math.md/#conv). + - [COS](math.md/#cos). + - [COSH](math.md/#cosh). + - [COT](math.md/#cot). + - [CRC32](math.md/#crc32). + - [DEGREES](math.md/#degrees). + - [E](math.md/#e). + - [EXP](math.md/#exp). + - [EXPM1](math.md/#expm1). + - [FLOOR](math.md/#floor). + - [LN](math.md/#ln). + - [LOG](math.md/#log). + - [LOG2](math.md/#log2). + - [LOG10](math.md/#log10). + - [MOD](math.md/#mod). + - [MODULUS](math.md/#modulus). + - [PI](math.md/#pi). + - [POW](math.md/#pow). + - [POWER](math.md/#power). + - [RADIANS](math.md/#radians). + - [RAND](math.md/#rand). + - [ROUND](math.md/#round). + - [SIGN](math.md/#sign). + - [SIGNUM](math.md/#signum). + - [SIN](math.md/#sin). + - [SINH](math.md/#sinh). + - [SQRT](math.md/#sqrt). + - [CBRT](math.md/#cbrt). + - [RINT](math.md/#rint). + +- [Relevance functions](relevance.md): + - [MATCH](relevance.md/#match). + - [MATCH_PHRASE](relevance.md/#match_phrase). + - [MATCH_PHRASE_PREFIX](relevance.md/#match_phrase_prefix). + - [MULTI_MATCH](relevance.md/#multi_match). + - [SIMPLE_QUERY_STRING](relevance.md/#simple_query_string). + - [MATCH_BOOL_PREFIX](relevance.md/#match_bool_prefix). + - [QUERY_STRING](relevance.md/#query_string). + +- [Statistical functions](statistical.md): + - [MAX](statistical.md/#max). + - [MIN](statistical.md/#min). + +- [String functions](string.md): + - [CONCAT](string.md/#concat). + - [CONCAT_WS](string.md/#concat_ws). + - [LENGTH](string.md/#length). + - [LIKE](string.md/#like). + - [ILIKE](string.md/#ilike). + - [LOCATE](string.md/#locate). + - [LOWER](string.md/#lower). + - [LTRIM](string.md/#ltrim). + - [POSITION](string.md/#position). + - [REPLACE](string.md/#replace). + - [REVERSE](string.md/#reverse). + - [RIGHT](string.md/#right). + - [RTRIM](string.md/#rtrim). + - [SUBSTRING](string.md/#substring). + - [TRIM](string.md/#trim). + - [UPPER](string.md/#upper). + - [REGEXP_REPLACE](string.md/#regexp_replace). + +- [System functions](system.md): + - [TYPEOF](system.md/#typeof). diff --git a/docs/user/ppl/functions/ip.md b/docs/user/ppl/functions/ip.md index c21816baea9..d59ccecc8c8 100644 --- a/docs/user/ppl/functions/ip.md +++ b/docs/user/ppl/functions/ip.md @@ -1,13 +1,19 @@ -# IP Address Functions +# IP address functions -## CIDRMATCH +The following IP address functions are supported in PPL. -### Description +## CIDRMATCH -Usage: `cidrmatch(ip, cidr)` checks if `ip` is within the specified `cidr` range. +**Usage**: `CIDRMATCH(ip, cidr)` -**Argument type:** `STRING`/`IP`, `STRING` -**Return type:** `BOOLEAN` +Checks whether an IP address is within the specified CIDR range. + +**Parameters**: + +- `ip` (Required): The IP address to check, as a string or IP value. Supports both IPv4 and IPv6. +- `cidr` (Required): The CIDR range to check against, as a string. Supports both IPv4 and IPv6 blocks. + +**Return type**: `BOOLEAN` ### Example @@ -17,7 +23,7 @@ source=weblogs | fields host, url ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -29,26 +35,29 @@ fetched rows / total rows = 2/2 +---------+--------------------+ ``` -Note: - - `ip` can be an IPv4 or IPv6 address - - `cidr` can be an IPv4 or IPv6 block - - `ip` and `cidr` must both be valid and non-missing/non-null - -## GEOIP +## GEOIP + +**Usage**: `GEOIP(dataSourceName, ipAddress[, options])` -### Description +Retrieves location information for IP addresses using the OpenSearch Geospatial plugin API. -Usage: `geoip(dataSourceName, ipAddress[, options])` to lookup location information from given IP addresses via OpenSearch GeoSpatial plugin API. +**Parameters**: -**Argument type:** `STRING`, `STRING`/`IP`, `STRING` -**Return type:** `OBJECT` +- `dataSourceName` (Required): The name of an established data source on the OpenSearch Geospatial plugin. For configuration details, see the [IP2Geo processor documentation](https://docs.opensearch.org/latest/ingest-pipelines/processors/ip2geo/). +- `ipAddress` (Required): The IP address to look up, as a string or IP value. Supports both IPv4 and IPv6. +- `options` (Optional): A comma-separated string of fields to output. The available fields depend on the data source provider's schema. For example, the `geolite2-city` dataset includes fields like `country_iso_code`, `country_name`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `time_zone`, and `location`. -### Example: + +**Return type**: `OBJECT` + +### Example ```ppl ignore source=weblogs | eval LookupResult = geoip("dataSourceName", "50.68.18.229", "country_iso_code,city_name") ``` + +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -58,8 +67,3 @@ fetched rows / total rows = 1/1 | {'city_name': 'Vancouver', 'country_iso_code': 'CA'} | +-------------------------------------------------------------+ ``` - -Note: - - `dataSourceName` must be an established dataSource on OpenSearch GeoSpatial plugin, detail of configuration can be found: https://opensearch.org/docs/latest/ingest-pipelines/processors/ip2geo/ - - `ip` can be an IPv4 or an IPv6 address - - `options` is an optional String of comma separated fields to output: the selection of fields is subject to dataSourceProvider's schema. For example, the list of fields in the provided `geolite2-city` dataset includes: "country_iso_code", "country_name", "continent_name", "region_iso_code", "region_name", "city_name", "time_zone", "location" diff --git a/docs/user/ppl/functions/json.md b/docs/user/ppl/functions/json.md index e9bd8cf8ac6..d459ed5f981 100644 --- a/docs/user/ppl/functions/json.md +++ b/docs/user/ppl/functions/json.md @@ -1,31 +1,36 @@ -# JSON Functions +# JSON functions -## JSON Path +PPL supports the following JSON functions for creating, parsing, and manipulating JSON data. -### Description +## JSON path All JSON paths used in JSON functions follow the format `{}.{}...`. -Each `` represents a field name. The `{}` part is optional and is only applicable when the corresponding key refers to an array. -For example +Each `` represents a field name. The `{}` part is optional and is used only when the corresponding key refers to an array. +For example: ```bash a{2}.b{0} - ``` -This refers to the element at index 0 of the `b` array, which is nested inside the element at index 2 of the `a` array. -Notes: -1. The `{}` notation applies **only when** the associated key points to an array. -2. `{}` (without a specific index) is interpreted as a **wildcard**, equivalent to `{*}`, meaning "all elements" in the array at that level. +This path accesses the element at index `0` in the `b` array, which is located within the element at index `2` of the `a` array. + +**Notes**: +1. The `{}` notation applies only when the associated key points to an array. +2. `{}` (without a specific index) is interpreted as a wildcard, equivalent to `{*}`, meaning `all elements` in the array at that level. ## JSON -### Description +**Usage**: `JSON(value)` + +Validates and parses a JSON string. Returns the parsed JSON value if the string is valid JSON, or `NULL` if invalid. + +**Parameters**: + +- `value` (Required): The string to validate and parse as JSON. + +**Return type**: `STRING` -Usage: `json(value)` Evaluates whether a string can be parsed as a json-encoded string. Returns the value if valid, null otherwise. -**Argument type:** `STRING` -**Return type:** `STRING` -### Example +#### Example ```ppl source=json_test @@ -34,7 +39,7 @@ source=json_test | fields test_name, json_string, json ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -48,17 +53,23 @@ fetched rows / total rows = 4/4 +--------------------+---------------------------------+---------------------------------+ ``` -## JSON_VALID +## JSON_VALID -### Description +**Usage**: `JSON_VALID(value)` + +Evaluates whether a string uses valid JSON syntax. Returns `TRUE` if valid, `FALSE` if invalid. `NULL` input returns `NULL`. + +**Version**: 3.1.0 +**Limitation**: Only works when `plugins.calcite.enabled=true` + +**Parameters**: + +- `value` (Required): The string to validate as JSON. + +**Return type**: `BOOLEAN` + +#### Example -Version: 3.1.0 -Limitation: Only works when `plugins.calcite.enabled=true` -Usage: `json_valid(value)` Evaluates whether a string uses valid JSON syntax. Returns TRUE if valid, FALSE if invalid. NULL input returns NULL. -**Argument type:** `STRING ` -**Return type:** `BOOLEAN ` -Example - ```ppl source=people | eval is_valid_json = json_valid('[1,2,3,4]'), is_invalid_json = json_valid('{invalid}') @@ -66,7 +77,7 @@ source=people | head 1 ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -77,15 +88,21 @@ fetched rows / total rows = 1/1 +---------------+-----------------+ ``` -## JSON_OBJECT +## JSON_OBJECT -### Description +**Usage**: `JSON_OBJECT(key1, value1, key2, value2, ...)` + +Creates a JSON object string from the specified key-value pairs. All keys must be strings. + +**Parameters**: + +- `key1`, `value1` (Required): The first key-value pair. The key must be a string. +- `key2`, `value2`, `...` (Optional): Additional key-value pairs. + +**Return type**: `STRING` + +#### Example -Usage: `json_object(key1, value1, key2, value2...)` create a json object string with key value pairs. The key must be string. -**Argument type:** `key1: STRING, value1: ANY, key2: STRING, value2: ANY ...` -**Return type:** `STRING` -### Example - ```ppl source=json_test | eval test_json = json_object('key', 123.45) @@ -93,7 +110,7 @@ source=json_test | fields test_json ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -104,15 +121,20 @@ fetched rows / total rows = 1/1 +----------------+ ``` -## JSON_ARRAY +## JSON_ARRAY -### Description +**Usage**: `JSON_ARRAY(element1, element2, ...)` + +Creates a JSON array string from the specified elements. + +**Parameters**: + +- `element1`, `element2`, `...` (Optional): The elements to include in the array. Can be any data type. + +**Return type**: `STRING` + +#### Example -Usage: `json_array(element1, element2, ...)` create a json array string with elements. -**Argument type:** `element1: ANY, element2: ANY ...` -**Return type:** `STRING` -### Example - ```ppl source=json_test | eval test_json_array = json_array('key', 123.45) @@ -120,7 +142,7 @@ source=json_test | fields test_json_array ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -131,15 +153,22 @@ fetched rows / total rows = 1/1 +-----------------+ ``` -## JSON_ARRAY_LENGTH +## JSON_ARRAY_LENGTH -### Description +**Usage**: `JSON_ARRAY_LENGTH(value)` + +Returns the number of elements in a JSON array. Returns `NULL` if the input is not a valid JSON array, is `NULL`, or contains invalid JSON. + +**Parameters**: + +- `value` (Required): A string containing a JSON array. + +**Return type**: `INTEGER` + +#### Examples + +The following example returns the length of a valid JSON array: -Usage: `json_array_length(value)` parse the string to json array and return size,, null is returned in case of any other valid JSON string, null or an invalid JSON. -**Argument type:** `value: A JSON STRING` -**Return type:** `INTEGER` -### Example - ```ppl source=json_test | eval array_length = json_array_length("[1,2,3]") @@ -147,7 +176,7 @@ source=json_test | fields array_length ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -157,6 +186,8 @@ fetched rows / total rows = 1/1 | 3 | +--------------+ ``` + +The following example returns `NULL` for non-array JSON values: ```ppl source=json_test @@ -165,7 +196,7 @@ source=json_test | fields array_length ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -176,15 +207,30 @@ fetched rows / total rows = 1/1 +--------------+ ``` -## JSON_EXTRACT +## JSON_EXTRACT -### Description +**Usage**: `JSON_EXTRACT(json_string, path1, path2, ...)` + +Extracts values from a JSON string using the specified JSON paths. + +**Behavior**: +- **Single path**: Returns the extracted value directly. +- **Multiple paths**: Returns a JSON array containing the extracted values in path order. +- **Invalid path**: Returns `NULL` for that path in the result. + +For path syntax details, see the [JSON path](#json-path) section. + +**Parameters**: + +- `json_string` (Required): The JSON string to extract values from. +- `path1`, `path2`, `...` (Required): One or more JSON paths specifying which values to extract. + +**Return type**: `STRING` + +#### Examples + +The following example extracts values using a single JSON path: -Usage: `json_extract(json_string, path1, path2, ...)` Extracts values using the specified JSON paths. If only one path is provided, it returns a single value. If multiple paths are provided, it returns a JSON Array in the order of the paths. If one path cannot find value, return null as the result for this path. The path use "{}" to represent index for array, "{}" means "{*}". -**Argument type:** `json_string: STRING, path1: STRING, path2: STRING ...` -**Return type:** `STRING` -### Example - ```ppl source=json_test | eval extract = json_extract('{"a": [{"b": 1}, {"b": 2}]}', 'a{}.b') @@ -192,7 +238,7 @@ source=json_test | fields extract ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -202,6 +248,8 @@ fetched rows / total rows = 1/1 | [1,2] | +---------+ ``` + +The following example extracts values using multiple JSON paths: ```ppl source=json_test @@ -210,7 +258,7 @@ source=json_test | fields extract ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -221,15 +269,23 @@ fetched rows / total rows = 1/1 +---------------------------+ ``` -## JSON_DELETE +## JSON_DELETE -### Description +**Usage**: `JSON_DELETE(json_string, path1, path2, ...)` + +Deletes values from a JSON string at the specified JSON paths. Returns the modified JSON string. If a path cannot find a value, no changes are made for that path. + +**Parameters**: + +- `json_string` (Required): The JSON string to delete values from. +- `path1`, `path2`, `...` (Required): One or more JSON paths specifying which values to delete. + +**Return type**: `STRING` + +#### Examples + +The following example deletes a value using a single JSON path: -Usage: `json_delete(json_string, path1, path2, ...)` Delete values using the specified JSON paths. Return the json string after deleting. If one path cannot find value, do nothing. -**Argument type:** `json_string: STRING, path1: STRING, path2: STRING ...` -**Return type:** `STRING` -### Example - ```ppl source=json_test | eval delete = json_delete('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b') @@ -237,7 +293,7 @@ source=json_test | fields delete ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -247,6 +303,8 @@ fetched rows / total rows = 1/1 | {"a":[{},{"b":2}]} | +--------------------+ ``` + +The following example deletes values using multiple JSON paths: ```ppl source=json_test @@ -255,7 +313,7 @@ source=json_test | fields delete ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -265,6 +323,8 @@ fetched rows / total rows = 1/1 | {"a":[{},{}]} | +---------------+ ``` + +The following example shows no changes occur when trying to delete a non-existent path: ```ppl source=json_test @@ -273,7 +333,7 @@ source=json_test | fields delete ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -284,15 +344,24 @@ fetched rows / total rows = 1/1 +-------------------------+ ``` -## JSON_SET +## JSON_SET -### Description +**Usage**: `JSON_SET(json_string, path1, value1, path2, value2, ...)` + +Sets values in a JSON string at the specified JSON paths. Returns the modified JSON string. If a path's parent node is not a JSON object, that path is skipped. + +**Parameters**: + +- `json_string` (Required): The JSON string to modify. +- `path1`, `value1` (Required): The first path-value pair to set. +- `path2`, `value2`, `...` (Optional): Additional path-value pairs. + +**Return type**: `STRING` + +#### Examples + +The following example sets a single value at a JSON path: -Usage: `json_set(json_string, path1, value1, path2, value2...)` Set values to corresponding paths using the specified JSON paths. If one path's parent node is not a json object, skip the path. Return the json string after setting. -**Argument type:** `json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ...` -**Return type:** `STRING` -### Example - ```ppl source=json_test | eval jsonSet = json_set('{"a": [{"b": 1}]}', 'a{0}.b', 3) @@ -300,7 +369,7 @@ source=json_test | fields jsonSet ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -310,6 +379,8 @@ fetched rows / total rows = 1/1 | {"a":[{"b":3}]} | +-----------------+ ``` + + ```ppl source=json_test @@ -318,7 +389,7 @@ source=json_test | fields jsonSet ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -329,32 +400,43 @@ fetched rows / total rows = 1/1 +-------------------------+ ``` -## JSON_APPEND +## JSON_APPEND -### Description +**Usage**: `JSON_APPEND(json_string, path1, value1, path2, value2, ...)` + +Appends values to arrays in a JSON string at the specified JSON paths. Returns the modified JSON string. If a path's target node is not an array, that path is skipped. + +**Parameters**: + +- `json_string` (Required): The JSON string to modify. +- `path1`, `value1` (Required): The first path-value pair to append. +- `path2`, `value2`, `...` (Optional): Additional path-value pairs. + +**Return type**: `STRING` + +#### Examples + +The following example appends a value to an array: -Usage: `json_append(json_string, path1, value1, path2, value2...)` Append values to corresponding paths using the specified JSON paths. If one path's target node is not an array, skip the path. Return the json string after setting. -**Argument type:** `json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ...` -**Return type:** `STRING` -### Example - ```ppl source=json_test -| eval jsonAppend = json_set('{"a": [{"b": 1}]}', 'a', 3) +| eval jsonAppend = json_append('{"a": [{"b": 1}]}', 'a', 3) | head 1 | fields jsonAppend ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 -+------------+ -| jsonAppend | -|------------| -| {"a":3} | -+------------+ ++-------------------+ +| jsonAppend | +|-------------------| +| {"a":[{"b":1},3]} | ++-------------------+ ``` + +The following example shows paths to non-array targets are skipped: ```ppl source=json_test @@ -363,7 +445,7 @@ source=json_test | fields jsonAppend ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -373,6 +455,8 @@ fetched rows / total rows = 1/1 | {"a":[{"b":1},{"b":2}]} | +-------------------------+ ``` + +The following example appends values using mixed path types: ```ppl source=json_test @@ -381,7 +465,7 @@ source=json_test | fields jsonAppend ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -392,15 +476,28 @@ fetched rows / total rows = 1/1 +-------------------------+ ``` -## JSON_EXTEND +## JSON_EXTEND -### Description +**Usage**: `JSON_EXTEND(json_string, path1, value1, path2, value2, ...)` + +Extends arrays in a JSON string at the specified JSON paths with new values. Returns the modified JSON string. If a path's target node is not an array, that path is skipped. + +The function attempts to parse each value as an array: +- If parsing succeeds: The parsed array elements are added to the target array. +- If parsing fails: The value is treated as a single element and added to the target array. + +**Parameters**: + +- `json_string` (Required): The JSON string to modify. +- `path1`, `value1` (Required): The first path-value pair to extend. +- `path2`, `value2`, `...` (Optional): Additional path-value pairs. + +**Return type**: `STRING` + +#### Examples + +The following example extends an array with a single value: -Usage: `json_extend(json_string, path1, value1, path2, value2...)` Extend values to corresponding paths using the specified JSON paths. If one path's target node is not an array, skip the path. The function will try to parse the value as an array. If it can be parsed, extend it to the target array. Otherwise, regard the value a single one. Return the json string after setting. -**Argument type:** `json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ...` -**Return type:** `STRING` -### Example - ```ppl source=json_test | eval jsonExtend = json_extend('{"a": [{"b": 1}]}', 'a', 3) @@ -408,7 +505,7 @@ source=json_test | fields jsonExtend ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -418,6 +515,8 @@ fetched rows / total rows = 1/1 | {"a":[{"b":1},3]} | +-------------------+ ``` + +The following example shows paths to non-array targets are skipped: ```ppl source=json_test @@ -426,7 +525,7 @@ source=json_test | fields jsonExtend ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -436,6 +535,8 @@ fetched rows / total rows = 1/1 | {"a":[{"b":1},{"b":2}]} | +-------------------------+ ``` + +The following example extends an array by parsing the value as an array: ```ppl source=json_test @@ -444,7 +545,7 @@ source=json_test | fields jsonExtend ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -455,15 +556,22 @@ fetched rows / total rows = 1/1 +-------------------------+ ``` -## JSON_KEYS +## JSON_KEYS -### Description +**Usage**: `JSON_KEYS(json_string)` + +Returns the keys of a JSON object as a JSON array. Returns `NULL` if the input is not a valid JSON object. + +**Parameters**: + +- `json_string` (Required): A string containing a JSON object. + +**Return type**: `STRING` + +#### Examples + +The following example gets keys from a simple JSON object: -Usage: `json_keys(json_string)` Return the key list of the Json object as a Json array. Otherwise, return null. -**Argument type:** `json_string: A JSON STRING` -**Return type:** `STRING` -### Example - ```ppl source=json_test | eval jsonKeys = json_keys('{"a": 1, "b": 2}') @@ -471,7 +579,7 @@ source=json_test | fields jsonKeys ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -481,6 +589,8 @@ fetched rows / total rows = 1/1 | ["a","b"] | +-----------+ ``` + +The following example gets keys from a nested JSON object: ```ppl source=json_test @@ -489,7 +599,7 @@ source=json_test | fields jsonKeys ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 diff --git a/docs/user/ppl/functions/math.md b/docs/user/ppl/functions/math.md index 834e3523fdf..19bf60889f7 100644 --- a/docs/user/ppl/functions/math.md +++ b/docs/user/ppl/functions/math.md @@ -1,12 +1,19 @@ -# Mathematical Functions +# Mathematical functions -## ABS +The following mathematical functions are supported in PPL. -### Description +## ABS + +**Usage**: `ABS(x)` + +Calculates the absolute value of `x`. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` (same type as input) -Usage: `abs(x)` calculates the abs x. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `INTEGER/LONG/FLOAT/DOUBLE` ### Example ```ppl @@ -15,7 +22,7 @@ source=people | fields `ABS(-1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -26,14 +33,21 @@ fetched rows / total rows = 1/1 +---------+ ``` -## ADD +## ADD + +**Usage**: `ADD(x, y)` -### Description +Calculates the sum of `x` and `y`. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `y` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: The wider numeric type between `x` and `y` + +**Synonyms**: Addition Symbol (`+`) -Usage: `add(x, y)` calculates x plus y. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `Wider number between x and y` -Synonyms: Addition Symbol (+) ### Example ```ppl @@ -42,7 +56,7 @@ source=people | fields `ADD(2, 1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -53,14 +67,21 @@ fetched rows / total rows = 1/1 +-----------+ ``` -## SUBTRACT +## SUBTRACT + +**Usage**: `SUBTRACT(x, y)` + +Calculates `x` minus `y`. -### Description +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `y` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: The wider numeric type between `x` and `y` + +**Synonyms**: Subtraction Symbol (`-`) -Usage: `subtract(x, y)` calculates x minus y. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `Wider number between x and y` -Synonyms: Subtraction Symbol (-) ### Example ```ppl @@ -69,7 +90,7 @@ source=people | fields `SUBTRACT(2, 1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -80,14 +101,21 @@ fetched rows / total rows = 1/1 +----------------+ ``` -## MULTIPLY +## MULTIPLY + +**Usage**: `MULTIPLY(x, y)` + +Calculates the product of `x` and `y`. + +**Parameters**: -### Description +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `y` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: The wider numeric type between `x` and `y` + +**Synonyms**: Multiplication Symbol (`*`) -Usage: `multiply(x, y)` calculates the multiplication of x and y. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `Wider number between x and y. If y equals to 0, then returns NULL.` -Synonyms: Multiplication Symbol (\*) ### Example ```ppl @@ -96,7 +124,7 @@ source=people | fields `MULTIPLY(2, 1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -107,14 +135,21 @@ fetched rows / total rows = 1/1 +----------------+ ``` -## DIVIDE +## DIVIDE + +**Usage**: `DIVIDE(x, y)` + +Calculates `x` divided by `y`. + +**Parameters**: -### Description +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `y` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: The wider numeric type between `x` and `y` + +**Synonyms**: Division Symbol (`/`) -Usage: `divide(x, y)` calculates x divided by y. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `Wider number between x and y` -Synonyms: Division Symbol (/) ### Example ```ppl @@ -123,7 +158,7 @@ source=people | fields `DIVIDE(2, 1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -134,14 +169,21 @@ fetched rows / total rows = 1/1 +--------------+ ``` -## SUM +## SUM + +**Usage**: `SUM(x, y, ...)` + +Calculates the sum of all provided arguments. This function accepts a variable number of arguments. + +This function is only available in the `eval` command context and is rewritten to arithmetic addition during query parsing. +{: .note} + +**Parameters**: -### Description +- `x, y, ...` (Required): Variable number of `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` arguments. + +**Return type**: The widest numeric type among all arguments -Usage: `sum(x, y, ...)` calculates the sum of all provided arguments. This function accepts a variable number of arguments. -Note: This function is only available in the eval command context and is rewritten to arithmetic addition while query parsing. -**Argument type:** `Variable number of INTEGER/LONG/FLOAT/DOUBLE arguments` -**Return type:** `Wider number type among all arguments` ### Example ```ppl @@ -150,7 +192,7 @@ source=accounts | fields `SUM(1, 2, 3)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -170,7 +212,7 @@ source=accounts | fields age, total ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -184,14 +226,21 @@ fetched rows / total rows = 4/4 +-----+-------+ ``` -## AVG +## AVG + +**Usage**: `AVG(x, y, ...)` + +Calculates the average (arithmetic mean) of all provided arguments. This function accepts a variable number of arguments. + +This function is only available in the `eval` command context and is rewritten to an arithmetic expression (sum or count) during query parsing. +{: .note} + +**Parameters**: + +- `x, y, ...` (Required): Variable number of `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` arguments. -### Description +**Return type**: `DOUBLE` -Usage: `avg(x, y, ...)` calculates the average (arithmetic mean) of all provided arguments. This function accepts a variable number of arguments. -Note: This function is only available in the eval command context and is rewritten to arithmetic expression (sum / count) at query parsing time. -**Argument type:** `Variable number of INTEGER/LONG/FLOAT/DOUBLE arguments` -**Return type:** `DOUBLE` ### Example ```ppl @@ -200,7 +249,7 @@ source=accounts | fields `AVG(1, 2, 3)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -220,7 +269,7 @@ source=accounts | fields age, average ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 4/4 @@ -234,13 +283,18 @@ fetched rows / total rows = 4/4 +-----+---------+ ``` -## ACOS +## ACOS -### Description +**Usage**: `ACOS(x)` + +Calculates the arccosine of `x`. Returns `NULL` if `x` is not in the `[-1, 1]` range. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `acos(x)` calculates the arc cosine of x. Returns NULL if x is not in the range -1 to 1. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -249,7 +303,7 @@ source=people | fields `ACOS(0)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -260,13 +314,18 @@ fetched rows / total rows = 1/1 +--------------------+ ``` -## ASIN +## ASIN + +**Usage**: `ASIN(x)` + +Calculates the arcsine of `x`. Returns `NULL` if `x` is not in the `[-1, 1]` range. -### Description +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `asin(x)` calculate the arc sine of x. Returns NULL if x is not in the range -1 to 1. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -275,7 +334,7 @@ source=people | fields `ASIN(0)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -286,13 +345,19 @@ fetched rows / total rows = 1/1 +---------+ ``` -## ATAN +## ATAN + +**Usage**: `ATAN(x)`, `ATAN(y, x)` + +Calculates the arctangent of `x`. `ATAN(y, x)` calculates the arctangent of the quotient y / x, using the signs of both arguments to determine the quadrant of the result. + +**Parameters**: -### Description +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `y` (Optional): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value (when using two-argument form). + +**Return type**: `DOUBLE` -Usage: `atan(x)` calculates the arc tangent of x. atan(y, x) calculates the arc tangent of y / x, except that the signs of both arguments are used to determine the quadrant of the result. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -301,7 +366,7 @@ source=people | fields `ATAN(2)`, `ATAN(2, 3)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -312,13 +377,19 @@ fetched rows / total rows = 1/1 +--------------------+--------------------+ ``` -## ATAN2 +## ATAN2 + +**Usage**: `ATAN2(y, x)` + +Calculates the arctangent of the quotient y / x, using the signs of both arguments to determine the quadrant of the result. -### Description +**Parameters**: + +- `y` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: atan2(y, x) calculates the arc tangent of y / x, except that the signs of both arguments are used to determine the quadrant of the result. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -327,7 +398,7 @@ source=people | fields `ATAN2(2, 3)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -340,16 +411,35 @@ fetched rows / total rows = 1/1 ## CEIL +**Usage**: `CEIL(x)` + +Returns the ceiling of the value `x`. + An alias for [CEILING](#ceiling) function. -## CEILING -### Description +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: Same type as input + +## CEILING + +**Usage**: `CEILING(x)` + +Returns the ceiling of the value `x`. + +The [`CEIL`](#ceil) and `CEILING` functions have the same implementation and functionality. +{: .note} + +Limitation: `CEILING` only works as expected when the IEEE 754 double type displays a decimal when stored. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: Same type as input -Usage: `CEILING(T)` takes the ceiling of value T. -Note: [CEIL](#ceil) and CEILING functions have the same implementation & functionality -Limitation: CEILING only works as expected when IEEE 754 double type displays decimal when stored. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `same type with input` ### Example ```ppl @@ -358,7 +448,7 @@ source=people | fields `CEILING(0)`, `CEILING(50.00005)`, `CEILING(-50.00005)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -375,7 +465,7 @@ source=people | fields `CEILING(3147483647.12345)`, `CEILING(113147483647.12345)`, `CEILING(3147483647.00001)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -386,13 +476,20 @@ fetched rows / total rows = 1/1 +---------------------------+-----------------------------+---------------------------+ ``` -## CONV +## CONV + +**Usage**: `CONV(x, a, b)` -### Description +Converts the number `x` from base `a` to base `b`. + +**Parameters**: + +- `x` (Required): A `STRING` value. +- `a` (Required): An `INTEGER` value. +- `b` (Required): An `INTEGER` value. + +**Return type**: `STRING` -Usage: `CONV(x, a, b)` converts the number x from a base to b base. -**Argument type:** `x: STRING, a: INTEGER, b: INTEGER` -**Return type:** `STRING` ### Example ```ppl @@ -401,7 +498,7 @@ source=people | fields `CONV('12', 10, 16)`, `CONV('2C', 16, 10)`, `CONV(12, 10, 2)`, `CONV(1111, 2, 10)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -412,13 +509,18 @@ fetched rows / total rows = 1/1 +--------------------+--------------------+-----------------+-------------------+ ``` -## COS +## COS + +**Usage**: `COS(x)` + +Calculates the cosine of `x`, where `x` is given in radians. -### Description +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `cos(x)` calculates the cosine of x, where x is given in radians. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -427,7 +529,7 @@ source=people | fields `COS(0)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -438,13 +540,18 @@ fetched rows / total rows = 1/1 +--------+ ``` -## COSH +## COSH + +**Usage**: `COSH(x)` + +Calculates the hyperbolic cosine of `x`, defined as (((e^x) + (e^(-x))) / 2). + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. -### Description +**Return type**: `DOUBLE` -Usage: `cosh(x)` calculates the hyperbolic cosine of x, defined as (((e^x) + (e^(-x))) / 2). -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -453,7 +560,7 @@ source=people | fields `COSH(2)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -464,13 +571,18 @@ fetched rows / total rows = 1/1 +--------------------+ ``` -## COT +## COT -### Description +**Usage**: `COT(x)` + +Calculates the cotangent of `x`. Returns an error if `x` equals 0. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `cot(x)` calculates the cotangent of x. Returns out-of-range error if x equals to 0. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -479,7 +591,7 @@ source=people | fields `COT(1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -490,13 +602,18 @@ fetched rows / total rows = 1/1 +--------------------+ ``` -## CRC32 +## CRC32 + +**Usage**: `CRC32(expr)` -### Description +Calculates a cyclic redundancy check value and returns a 32-bit unsigned value. + +**Parameters**: + +- `expr` (Required): A `STRING` value. + +**Return type**: `LONG` -Usage: Calculates a cyclic redundancy check value and returns a 32-bit unsigned value. -**Argument type:** `STRING` -**Return type:** `LONG` ### Example ```ppl @@ -505,7 +622,7 @@ source=people | fields `CRC32('MySQL')` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -516,13 +633,18 @@ fetched rows / total rows = 1/1 +----------------+ ``` -## DEGREES +## DEGREES + +**Usage**: `DEGREES(x)` + +Converts `x` from radians to degrees. + +**Parameters**: -### Description +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `degrees(x)` converts x from radians to degrees. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -531,7 +653,7 @@ source=people | fields `DEGREES(1.57)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -542,12 +664,16 @@ fetched rows / total rows = 1/1 +-------------------+ ``` -## E +## E + +**Usage**: `E()` + +Returns Euler's number (e ≈ 2.718281828459045). -### Description +**Parameters**: None + +**Return type**: `DOUBLE` -Usage: `E()` returns the Euler's number -**Return type:** `DOUBLE` ### Example ```ppl @@ -556,7 +682,7 @@ source=people | fields `E()` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -567,13 +693,18 @@ fetched rows / total rows = 1/1 +-------------------+ ``` -## EXP +## EXP + +**Usage**: `EXP(x)` -### Description +Returns e raised to the power of `x`. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `exp(x)` return e raised to the power of x. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -582,7 +713,7 @@ source=people | fields `EXP(2)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -593,13 +724,18 @@ fetched rows / total rows = 1/1 +------------------+ ``` -## EXPM1 +## EXPM1 + +**Usage**: `EXPM1(x)` + +Returns e^x - 1 (exponential of `x` minus 1). + +**Parameters**: -### Description +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: expm1(NUMBER T) returns the exponential of T, minus 1. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -608,7 +744,7 @@ source=people | fields `EXPM1(1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -619,14 +755,20 @@ fetched rows / total rows = 1/1 +-------------------+ ``` -## FLOOR +## FLOOR + +**Usage**: `FLOOR(x)` + +Returns the floor of the value `x`. + +Limitation: `FLOOR` only works as expected when the IEEE 754 double type displays a decimal when stored. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. -### Description +**Return type**: Same type as input -Usage: `FLOOR(T)` takes the floor of value T. -Limitation: FLOOR only works as expected when IEEE 754 double type displays decimal when stored. -**Argument type:** `a: INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `same type with input` ### Example ```ppl @@ -635,7 +777,7 @@ source=people | fields `FLOOR(0)`, `FLOOR(50.00005)`, `FLOOR(-50.00005)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -652,7 +794,7 @@ source=people | fields `FLOOR(3147483647.12345)`, `FLOOR(113147483647.12345)`, `FLOOR(3147483647.00001)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -669,7 +811,7 @@ source=people | fields `FLOOR(282474973688888.022)`, `FLOOR(9223372036854775807.022)`, `FLOOR(9223372036854775807.0000001)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -680,13 +822,18 @@ fetched rows / total rows = 1/1 +----------------------------+--------------------------------+------------------------------------+ ``` -## LN +## LN -### Description +**Usage**: `LN(x)` + +Returns the natural logarithm of `x`. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `ln(x)` return the the natural logarithm of x. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -695,7 +842,7 @@ source=people | fields `LN(2)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -706,14 +853,19 @@ fetched rows / total rows = 1/1 +--------------------+ ``` -## LOG +## LOG + +**Usage**: `LOG(x)`, `LOG(B, x)` + +Returns the natural logarithm of `x` (base e logarithm). `LOG(B, x)` is equivalent to log(x)/log(B). -### Description +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `B` (Optional): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value (when using two-argument form). + +**Return type**: `DOUBLE` -Specifications: -Usage: `log(x)` returns the natural logarithm of x that is the base e logarithm of the x. log(B, x) is equivalent to log(x)/log(B). -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -722,7 +874,7 @@ source=people | fields `LOG(2)`, `LOG(2, 8)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -733,14 +885,18 @@ fetched rows / total rows = 1/1 +--------------------+-----------+ ``` -## LOG2 +## LOG2 + +**Usage**: `LOG2(x)` + +Returns the base-2 logarithm of `x`. Equivalent to log(x)/log(2). + +**Parameters**: -### Description +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Specifications: -Usage: log2(x) is equivalent to log(x)/log(2). -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -749,7 +905,7 @@ source=people | fields `LOG2(8)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -760,14 +916,18 @@ fetched rows / total rows = 1/1 +---------+ ``` -## LOG10 +## LOG10 + +**Usage**: `LOG10(x)` + +Returns the base-10 logarithm of `x`. Equivalent to log(x)/log(10). -### Description +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Specifications: -Usage: log10(x) is equivalent to log(x)/log(10). -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -776,7 +936,7 @@ source=people | fields `LOG10(100)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -787,13 +947,19 @@ fetched rows / total rows = 1/1 +------------+ ``` -## MOD +## MOD + +**Usage**: `MOD(n, m)` -### Description +Calculates the remainder of the number `n` divided by `m`. + +**Parameters**: + +- `n` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `m` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: The wider type between `n` and `m` if `m` is nonzero value. If `m` equals `0`, then returns `NULL`. -Usage: `MOD(n, m)` calculates the remainder of the number n divided by m. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `Wider type between types of n and m if m is nonzero value. If m equals to 0, then returns NULL.` ### Example ```ppl @@ -802,7 +968,7 @@ source=people | fields `MOD(3, 2)`, `MOD(3.1, 2)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -813,13 +979,19 @@ fetched rows / total rows = 1/1 +-----------+-------------+ ``` -## MODULUS +## MODULUS + +**Usage**: `MODULUS(n, m)` + +Calculates the remainder of the number `n` divided by `m`. -### Description +**Parameters**: + +- `n` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `m` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: The wider type between `n` and `m` if `m` is nonzero value. If `m` equals `0`, then returns `NULL`. -Usage: `MODULUS(n, m)` calculates the remainder of the number n divided by m. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `Wider type between types of n and m if m is nonzero value. If m equals to 0, then returns NULL.` ### Example ```ppl @@ -828,7 +1000,7 @@ source=people | fields `MODULUS(3, 2)`, `MODULUS(3.1, 2)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -839,12 +1011,16 @@ fetched rows / total rows = 1/1 +---------------+-----------------+ ``` -## PI +## PI + +**Usage**: `PI()` -### Description +Returns the mathematical constant π (pi ≈ 3.141592653589793). + +**Parameters**: None + +**Return type**: `DOUBLE` -Usage: `PI()` returns the constant pi -**Return type:** `DOUBLE` ### Example ```ppl @@ -853,7 +1029,7 @@ source=people | fields `PI()` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -864,14 +1040,21 @@ fetched rows / total rows = 1/1 +-------------------+ ``` -## POW +## POW + +**Usage**: `POW(x, y)` -### Description +Calculates the value of `x` raised to the power of `y`. Invalid inputs return `NULL`. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `y` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` + +**Synonyms**: [POWER](#power) -Usage: `POW(x, y)` calculates the value of x raised to the power of y. Bad inputs return NULL result. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` -Synonyms: [POWER](#power) ### Example ```ppl @@ -880,7 +1063,7 @@ source=people | fields `POW(3, 2)`, `POW(-3, 2)`, `POW(3, -2)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -891,14 +1074,21 @@ fetched rows / total rows = 1/1 +-----------+------------+--------------------+ ``` -## POWER +## POWER + +**Usage**: `POWER(x, y)` -### Description +Calculates the value of `x` raised to the power of `y`. Invalid inputs return `NULL`. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `y` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` + +**Synonyms**: [POW](#pow) -Usage: `POWER(x, y)` calculates the value of x raised to the power of y. Bad inputs return NULL result. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` -Synonyms: [POW](#pow) ### Example ```ppl @@ -907,7 +1097,7 @@ source=people | fields `POWER(3, 2)`, `POWER(-3, 2)`, `POWER(3, -2)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -918,13 +1108,18 @@ fetched rows / total rows = 1/1 +-------------+--------------+--------------------+ ``` -## RADIANS +## RADIANS + +**Usage**: `RADIANS(x)` + +Converts x from degrees to radians. -### Description +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `radians(x)` converts x from degrees to radians. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -933,7 +1128,7 @@ source=people | fields `RADIANS(90)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -944,13 +1139,18 @@ fetched rows / total rows = 1/1 +--------------------+ ``` -## RAND +## RAND + +**Usage**: `RAND()`, `RAND(N)` + +Returns a random floating-point value in the `[0, 1)` range. If an integer `N` is specified, the seed is initialized prior to execution. As a result, calling `RAND(N)` with the same value of `N` always returns the same result, producing a repeatable sequence of column values. + +**Parameters**: + +- `N` (Optional): An `INTEGER` value. -### Description +**Return type**: `FLOAT` -Usage: `RAND()`/`RAND(`N) returns a random floating-point value in the range 0 <= value < 1.0. If integer N is specified, the seed is initialized prior to execution. One implication of this behavior is with identical argument N, rand(N) returns the same value each time, and thus produces a repeatable sequence of column values. -**Argument type:** `INTEGER` -**Return type:** `FLOAT` ### Example ```ppl @@ -959,7 +1159,7 @@ source=people | fields `RAND(3)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -970,15 +1170,21 @@ fetched rows / total rows = 1/1 +---------------------+ ``` -## ROUND +## ROUND -### Description +**Usage**: `ROUND(x, d)` + +Rounds the argument `x` to `d` decimal places. `d` defaults to `0`. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. +- `d` (Optional): An `INTEGER` value. + +**Return type**: +- `(INTEGER/LONG [,INTEGER])` -> `LONG`. +- `(FLOAT/DOUBLE [,INTEGER])` -> `LONG`. -Usage: `ROUND(x, d)` rounds the argument x to d decimal places, d defaults to 0 if not specified -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -Return type map: -(INTEGER/LONG [,INTEGER]) -> LONG -(FLOAT/DOUBLE [,INTEGER]) -> LONG ### Example ```ppl @@ -987,7 +1193,7 @@ source=people | fields `ROUND(12.34)`, `ROUND(12.34, 1)`, `ROUND(12.34, -1)`, `ROUND(12, 1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -998,13 +1204,18 @@ fetched rows / total rows = 1/1 +--------------+-----------------+------------------+--------------+ ``` -## SIGN +## SIGN + +**Usage**: `SIGN(x)` + +Returns the sign of the argument as `-1`, `0`, or `1`, depending on whether the number is negative, zero, or positive. -### Description +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: Same type as input -Usage: Returns the sign of the argument as -1, 0, or 1, depending on whether the number is negative, zero, or positive -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `same type with input` ### Example ```ppl @@ -1013,7 +1224,7 @@ source=people | fields `SIGN(1)`, `SIGN(0)`, `SIGN(-1.1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1024,14 +1235,20 @@ fetched rows / total rows = 1/1 +---------+---------+------------+ ``` -## SIGNUM +## SIGNUM + +**Usage**: `SIGNUM(x)` + +Returns the sign of the argument as `-1`, `0`, or `1`, depending on whether the number is negative, zero, or positive. + +**Parameters**: -### Description +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `INTEGER` + +**Synonyms**: `SIGN` -Usage: Returns the sign of the argument as -1, 0, or 1, depending on whether the number is negative, zero, or positive -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `INTEGER` -Synonyms: `SIGN` ### Example ```ppl @@ -1040,7 +1257,7 @@ source=people | fields `SIGNUM(1)`, `SIGNUM(0)`, `SIGNUM(-1.1)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1051,13 +1268,18 @@ fetched rows / total rows = 1/1 +-----------+-----------+--------------+ ``` -## SIN +## SIN + +**Usage**: `SIN(x)` + +Calculates the sine of `x`, where `x` is given in radians. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. -### Description +**Return type**: `DOUBLE` -Usage: `sin(x)` calculates the sine of x, where x is given in radians. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -1066,7 +1288,7 @@ source=people | fields `SIN(0)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1077,13 +1299,18 @@ fetched rows / total rows = 1/1 +--------+ ``` -## SINH +## SINH -### Description +**Usage**: `SINH(x)` + +Calculates the hyperbolic sine of `x`, defined as (((e^x) - (e^(-x))) / 2). + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: `sinh(x)` calculates the hyperbolic sine of x, defined as (((e^x) - (e^(-x))) / 2). -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -1092,7 +1319,7 @@ source=people | fields `SINH(2)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1103,15 +1330,20 @@ fetched rows / total rows = 1/1 +-------------------+ ``` -## SQRT +## SQRT + +**Usage**: `SQRT(x)` -### Description +Calculates the square root of a non-negative number `x`. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: +- `(Non-negative) INTEGER/LONG/FLOAT/DOUBLE` -> `DOUBLE`. +- `(Negative) INTEGER/LONG/FLOAT/DOUBLE` -> `NULL`. -Usage: Calculates the square root of a non-negative number -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -Return type map: -(Non-negative) INTEGER/LONG/FLOAT/DOUBLE -> DOUBLE -(Negative) INTEGER/LONG/FLOAT/DOUBLE -> NULL ### Example ```ppl @@ -1120,7 +1352,7 @@ source=people | fields `SQRT(4)`, `SQRT(4.41)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 @@ -1131,14 +1363,18 @@ fetched rows / total rows = 1/1 +---------+------------+ ``` -## CBRT +## CBRT + +**Usage**: `CBRT(x)` + +Calculates the cube root of a number `x`. + +**Parameters**: -### Description +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. + +**Return type**: `DOUBLE` -Usage: Calculates the cube root of a number -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -Return type DOUBLE: -INTEGER/LONG/FLOAT/DOUBLE -> DOUBLE ### Example ```ppl ignore @@ -1147,7 +1383,7 @@ source=location | fields `CBRT(8)`, `CBRT(9.261)`, `CBRT(-27)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 2/2 @@ -1159,13 +1395,18 @@ fetched rows / total rows = 2/2 +---------+-------------+-----------+ ``` -## RINT +## RINT + +**Usage**: `RINT(x)` + +Returns `x` rounded to the nearest integer. + +**Parameters**: + +- `x` (Required): An `INTEGER`, `LONG`, `FLOAT`, or `DOUBLE` value. -### Description +**Return type**: `DOUBLE` -Usage: `rint(NUMBER T)` returns T rounded to the closest whole integer number. -**Argument type:** `INTEGER/LONG/FLOAT/DOUBLE` -**Return type:** `DOUBLE` ### Example ```ppl @@ -1174,7 +1415,7 @@ source=people | fields `RINT(1.7)` ``` -Expected output: +The query returns the following results: ```text fetched rows / total rows = 1/1 diff --git a/docs/user/ppl/functions/relevance.md b/docs/user/ppl/functions/relevance.md index a40a3cd7644..a0bcfe59cd2 100644 --- a/docs/user/ppl/functions/relevance.md +++ b/docs/user/ppl/functions/relevance.md @@ -1,26 +1,41 @@ -# Relevance Functions - -The relevance based functions enable users to search the index for documents by the relevance of the input query. The functions are built on the top of the search queries of the OpenSearch engine, but in memory execution within the plugin is not supported. These functions are able to perform the global filter of a query, for example the condition expression in a `WHERE` clause or in a `HAVING` clause. For more details of the relevance based search, check out the design here: [Relevance Based Search With SQL/PPL Query Engine](https://github.com/opensearch-project/sql/issues/182) -## MATCH - -### Description - -`match(field_expression, query_expression[, option=]*)` -The match function maps to the match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field. Available parameters include: -- analyzer -- auto_generate_synonyms_phrase -- fuzziness -- max_expansions -- prefix_length -- fuzzy_transpositions -- fuzzy_rewrite -- lenient -- operator -- minimum_should_match -- zero_terms_query -- boost - -Example with only `field` and `query` expressions, and all other parameters are set default values +# Relevance functions + +Relevance-based functions enable users to search an index for documents based on query relevance. These functions are built on top of OpenSearch engine search queries, but in-memory execution within the plugin is not supported. + +You can use these functions for global query filtering, such as in condition expressions within `WHERE` or `HAVING` clauses. For more details about relevance-based search, see [Relevance Based Search With SQL/PPL Query Engine](https://github.com/opensearch-project/sql/issues/182). + +## MATCH + +**Usage**: `MATCH(, [,

    Verifies that when a PPL {@code sort} has multiple fields before a {@code dedup}, every + * field is preserved through the pushdown (not only the first one). A single-field pushdown would + * lose the tie-breaker and return a non-deterministic row for each dedup group. + * + *

    Data used: the {@code accounts} test index. In state {@code AK} there are multiple F and M + * accounts; under {@code sort state, age, account_number} the first M row is {@code (state=AK, + * age=20, account_number=23)} and the first F row is {@code (state=AK, age=21, + * account_number=334)}. Only a correct multi-field pushdown produces these exact rows. + */ + @Test + public void testMultiColumnSortThenDedup() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | sort state, age, account_number | dedup 1 gender | fields gender," + + " state, age, account_number", + TEST_INDEX_ACCOUNT)); + verifyDataRows(actual, rows("M", "AK", 20, 23), rows("F", "AK", 21, 334)); + } + + /** Regression test for https://github.com/opensearch-project/sql/issues/3922 */ + @Test + public void testSortThenDedupKeepEmpty() throws IOException { + // Verify sort order is preserved through dedup with keepempty=true + JSONObject actual = + executeQuery( + String.format( + "source=%s | sort category | dedup 1 name KEEPEMPTY=true | fields category, name", + TEST_INDEX_DUPLICATION_NULLABLE)); + // category should be in ascending order (with nulls first due to ASC-nulls-first) + // dedup 1 name KEEPEMPTY=true: keep first occurrence of each name, plus ALL null-name rows + verifyDataRows( + actual, + rows(null, null), + rows(null, "B"), + rows(null, "E"), + rows("X", null), + rows("X", "A"), + rows("X", "C"), + rows("Y", null), + rows("Z", null), + rows("Z", "D")); + } + @Test public void testDedupExpr() throws IOException { JSONObject actual = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java index f1b546a5681..fd9a5cff774 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java @@ -171,10 +171,63 @@ public void testCoalesceWithAllNonExistentFields() throws IOException { + " head 1", TEST_INDEX_STATE_COUNTRY_WITH_NULL)); - verifySchema(actual, schema("name", "string"), schema("result", "string")); + // When every COALESCE operand is missing/null, the result has no known type (see #5175). + verifySchema(actual, schema("name", "string"), schema("result", "undefined")); verifyDataRows(actual, rows("Jake", null)); } + @Test + public void testCoalesceWithNullLiteralAndInteger() throws IOException { + // Bug #5175: COALESCE(null, 42) must return the integer 42, not the string "42". + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = coalesce(null, 42) | fields result | head 1", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(42)); + } + + @Test + public void testCoalesceWithIntegerAndNullLiteral() throws IOException { + // Bug #5175: COALESCE(42, null) must return the integer 42, not the string "42". + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = coalesce(42, null) | fields result | head 1", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchema(actual, schema("result", "int")); + verifyDataRows(actual, rows(42)); + } + + @Test + public void testCoalesceWithNullLiteralAndDouble() throws IOException { + // Bug #5175: COALESCE(null, 3.14) must return a numeric double, not a string. + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = coalesce(null, 3.14) | fields result | head 1", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchema(actual, schema("result", "double")); + verifyDataRows(actual, rows(3.14)); + } + + @Test + public void testCoalesceWithNullLiteralAndIntegerField() throws IOException { + // Bug #5175: COALESCE(null, age) on an int field must keep the integer type. + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval result = coalesce(null, age) | fields age, result | head 3", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchema(actual, schema("age", "int"), schema("result", "int")); + verifyDataRows(actual, rows(70, 70), rows(30, 30), rows(25, 25)); + } + @Test public void testCoalesceWithEmptyString() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index 3d2b6ee5b0b..62e872be945 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -44,6 +44,9 @@ public class CalcitePPLGraphLookupIT extends PPLIntegTestCase { public void init() throws Exception { super.init(); enableCalcite(); + // Skip test if pushdown is disabled + // TODO: support no-pushdown config for graph lookup + enabledOnlyWhenPushdownIsEnabled(); loadIndex(Index.GRAPH_EMPLOYEES); loadIndex(Index.GRAPH_TRAVELERS); @@ -790,4 +793,127 @@ public void testBatchModeBidirectional() throws IOException { mapOf("name", "Dan", "reportsTo", "Andrew", "id", 6, "depth", 0), mapOf("name", "Asya", "reportsTo", "Ron", "id", 5, "depth", 1)))); } + + // ==================== Top-Level Literal Start Tests ==================== + + /** + * Test 20: Top-level graphLookup with single literal start value. BFS from "Eliot" finds the + * reporting chain: Eliot->Ron->Andrew. + */ + @Test + public void testTopLevelGraphLookupSingleLiteral() throws IOException { + JSONObject result = + executeQuery( + String.format( + "graphLookup %s" + + " start='Eliot'" + + " edge=reportsTo-->name" + + " maxDepth=5" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES)); + + // Output is single row with just the reportingHierarchy array + verifySchema(result, schema("reportingHierarchy", "array")); + // BFS from "Eliot": toField=name matches Eliot -> Eliot row has reportsTo=Ron + // -> then name matches Ron -> Ron row has reportsTo=Andrew + // -> then name matches Andrew -> Andrew row has reportsTo=null (no further traversal) + verifyDataRows( + result, + rows( + (Object) + List.of( + Map.of("name", "Eliot", "reportsTo", "Ron", "id", 2), + Map.of("name", "Ron", "reportsTo", "Andrew", "id", 3), + mapOf("name", "Andrew", "reportsTo", null, "id", 4)))); + } + + /** + * Test 21: Top-level graphLookup with literal list start values. Combined BFS from "Eliot" and + * "Andrew". + */ + @Test + public void testTopLevelGraphLookupLiteralList() throws IOException { + JSONObject result = + executeQuery( + String.format( + "graphLookup %s" + + " start='Eliot', 'Andrew'" + + " edge=reportsTo-->name" + + " maxDepth=5" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES)); + + verifySchema(result, schema("reportingHierarchy", "array")); + // Combined BFS from {Eliot, Andrew}: + // Depth 0: name IN (Eliot, Andrew) → finds Eliot (reportsTo=Ron) and Andrew (reportsTo=null) + // Depth 1: name IN (Ron) AND reportsTo NOT IN (Eliot, Andrew, Ron) → Ron excluded + // because Ron.reportsTo=Andrew is in visited set + verifyDataRows( + result, + rows( + (Object) + List.of( + Map.of("name", "Eliot", "reportsTo", "Ron", "id", 2), + mapOf("name", "Andrew", "reportsTo", null, "id", 4)))); + } + + /** Test 22: Top-level graphLookup with maxDepth. */ + @Test + public void testTopLevelGraphLookupWithMaxDepth() throws IOException { + JSONObject result = + executeQuery( + String.format( + "graphLookup %s" + + " start='Eliot'" + + " edge=reportsTo-->name" + + " maxDepth=0" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES)); + + verifySchema(result, schema("reportingHierarchy", "array")); + // maxDepth=0: Only immediate match for "Eliot" (Eliot row), no further traversal + verifyDataRows( + result, rows((Object) List.of(Map.of("name", "Eliot", "reportsTo", "Ron", "id", 2)))); + } + + /** Test 23: Top-level graphLookup with depthField and maxDepth. */ + @Test + public void testTopLevelGraphLookupWithDepthField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "graphLookup %s" + + " start='Eliot'" + + " edge=reportsTo-->name" + + " depthField=level" + + " maxDepth=5" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES)); + + verifySchema(result, schema("reportingHierarchy", "array")); + verifyDataRows( + result, + rows( + (Object) + List.of( + mapOf("name", "Eliot", "reportsTo", "Ron", "id", 2, "level", 0), + mapOf("name", "Ron", "reportsTo", "Andrew", "id", 3, "level", 1), + mapOf("name", "Andrew", "reportsTo", null, "id", 4, "level", 2)))); + } + + /** Test 24: Top-level graphLookup with non-existent start value yields empty results. */ + @Test + public void testTopLevelGraphLookupNonExistentStart() throws IOException { + JSONObject result = + executeQuery( + String.format( + "graphLookup %s" + + " start='NonExistent'" + + " edge=reportsTo-->name" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES)); + + verifySchema(result, schema("reportingHierarchy", "array")); + verifyDataRows(result, rows((Object) Collections.emptyList())); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java index 0ec367aa318..99af10302ae 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJsonBuiltinFunctionIT.java @@ -296,6 +296,38 @@ public void testJsonSetPartialSet() throws IOException { verifyDataRows(actual, rows("{\"a\":[{\"b\":1},{\"b\":{\"c\":\"3\"}}]}")); } + @Test + public void testJsonSetWithDollarPrefixedPath() throws IOException { + // Issue #5167: json_set with $.key path should not double-prefix + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval a" + + " =json_set('{\\\"name\\\":\\\"alice\\\",\\\"scores\\\":[90,85,92]}'," + + " '$.name', 'modified_alice')| fields a | head 1", + TEST_INDEX_PEOPLE2)); + + verifySchema(actual, schema("a", "string")); + + verifyDataRows(actual, rows("{\"name\":\"modified_alice\",\"scores\":[90,85,92]}")); + } + + @Test + public void testJsonDeleteWithDollarPrefixedPath() throws IOException { + // Issue #5167: json_delete with $.key path should remove the key + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval a" + + " =json_delete('{\\\"name\\\":\\\"alice\\\",\\\"scores\\\":[90,85,92]}'," + + " '$.name')| fields a | head 1", + TEST_INDEX_PEOPLE2)); + + verifySchema(actual, schema("a", "string")); + + verifyDataRows(actual, rows("{\"scores\":[90,85,92]}")); + } + @Test public void testJsonDelete() throws IOException { JSONObject actual = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLNestedAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLNestedAggregationIT.java index faaae541d1e..c7ec6434744 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLNestedAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLNestedAggregationIT.java @@ -17,6 +17,7 @@ import java.io.IOException; import org.json.JSONObject; import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalcitePPLNestedAggregationIT extends PPLIntegTestCase { @@ -175,7 +176,7 @@ public void testNestedAggregationThrowExceptionIfPushdownCannotApplied() throws enabledOnlyWhenPushdownIsEnabled(); Throwable t = assertThrowsWithReplace( - UnsupportedOperationException.class, + ErrorReport.class, () -> executeQuery( String.format( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java index 6cd0674a2dc..3503d7c533c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java @@ -14,6 +14,10 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder; import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import org.hamcrest.Matcher; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.Test; import org.opensearch.sql.ppl.PPLIntegTestCase; @@ -40,7 +44,7 @@ public void testRename() throws IOException { schema("country", "string"), schema("year", "int"), schema("month", "int")); - verifyStandardDataRows(result); + verifyStandardDataRows(result, "name", "country", "state", "month", "year", "renamed_age"); } @Test @@ -77,7 +81,7 @@ public void testRenameToMetaField() throws IOException { schema("country", "string"), schema("year", "int"), schema("month", "int")); - verifyStandardDataRows(result); + verifyStandardDataRows(result, "name", "country", "state", "month", "year", "_ID"); } @Test @@ -156,7 +160,7 @@ public void testRenameWildcardFields() throws IOException { schema("country", "string"), schema("year", "int"), schema("month", "int")); - verifyStandardDataRows(result); + verifyStandardDataRows(result, "nAME", "country", "state", "month", "year", "age"); } @Test @@ -171,7 +175,7 @@ public void testRenameMultipleWildcardFields() throws IOException { schema("couNTry", "string"), schema("year", "int"), schema("moNTh", "int")); - verifyStandardDataRows(result); + verifyStandardDataRows(result, "name", "couNTry", "state", "moNTh", "year", "age"); } @Test @@ -186,7 +190,7 @@ public void testRenameWildcardPrefix() throws IOException { schema("country", "string"), schema("year", "int"), schema("month", "int")); - verifyStandardDataRows(result); + verifyStandardDataRows(result, "new_na", "country", "state", "month", "year", "age"); } @Test @@ -199,6 +203,41 @@ public void testRenameFullWildcard() throws IOException { verifyDataRows(result, rows("Jake", 70), rows("Hello", 30), rows("John", 25), rows("Jane", 20)); } + @Test + public void testRenameFullWildcardExcludesMetadataFields() throws IOException { + JSONObject result = + executeQuery(String.format("source = %s | rename * as old_*", TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("old_name", "string"), + schema("old_age", "int"), + schema("old_state", "string"), + schema("old_country", "string"), + schema("old_year", "int"), + schema("old_month", "int")); + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70), + rows("Hello", "USA", "New York", 4, 2023, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20)); + } + + @Test + public void testRenamePartialWildcardExcludesMetadataFields() throws IOException { + JSONObject result = + executeQuery(String.format("source = %s | rename _* as meta_*", TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int")); + verifyStandardDataRows(result); + } + @Test public void testRenameMultipleWildcards() throws IOException { JSONObject result = @@ -212,7 +251,7 @@ public void testRenameMultipleWildcards() throws IOException { schema("country", "string"), schema("year", "int"), schema("MoNtH", "int")); - verifyStandardDataRows(result); + verifyStandardDataRows(result, "name", "country", "state", "MoNtH", "year", "age"); } @Test @@ -261,12 +300,14 @@ public void testRenamingToExistingField() throws IOException { schema("country", "string"), schema("year", "int"), schema("month", "int")); - verifyDataRows( + // After `rename name as age`, the original name column overwrites the original age column; + // the (number) age values are gone and only the (string) name values remain under "age". + verifyDataRowsByColumn( result, - rows("Jake", "USA", "California", 4, 2023), - rows("Hello", "USA", "New York", 4, 2023), - rows("John", "Canada", "Ontario", 4, 2023), - rows("Jane", "Canada", "Quebec", 4, 2023)); + rowOf("age", "Jake", "country", "USA", "state", "California", "month", 4, "year", 2023), + rowOf("age", "Hello", "country", "USA", "state", "New York", "month", 4, "year", 2023), + rowOf("age", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023), + rowOf("age", "Jane", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023)); } @Test @@ -296,12 +337,12 @@ public void testRenamingNonExistentFieldToExistingField() throws IOException { schema("country", "string"), schema("year", "int"), schema("month", "int")); - verifyDataRows( + verifyDataRowsByColumn( result, - rows("Jake", "USA", "California", 4, 2023), - rows("Hello", "USA", "New York", 4, 2023), - rows("John", "Canada", "Ontario", 4, 2023), - rows("Jane", "Canada", "Quebec", 4, 2023)); + rowOf("name", "Jake", "country", "USA", "state", "California", "month", 4, "year", 2023), + rowOf("name", "Hello", "country", "USA", "state", "New York", "month", 4, "year", 2023), + rowOf("name", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023), + rowOf("name", "Jane", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023)); } @Test @@ -345,7 +386,7 @@ public void testMultipleRenameWithoutComma() throws IOException { schema("location", "string"), schema("year", "int"), schema("month", "int")); - verifyStandardDataRows(result); + verifyStandardDataRows(result, "user_name", "location", "state", "month", "year", "user_age"); } @Test @@ -363,15 +404,103 @@ public void testRenameMixedCommaAndSpace() throws IOException { schema("location", "string"), schema("year", "int"), schema("month", "int")); - verifyStandardDataRows(result); + verifyStandardDataRows(result, "user_name", "location", "state", "month", "year", "user_age"); + } + + /** + * Build a {@code column -> value} map from interleaved varargs ({@code key1, val1, key2, val2, + * ...}). Preserves insertion order so the expected-row mapping reads naturally at the call site. + */ + private static Map rowOf(Object... pairs) { + if (pairs.length % 2 != 0) { + throw new IllegalArgumentException("rowOf expects an even number of args (key, value, ...)"); + } + Map row = new LinkedHashMap<>(); + for (int i = 0; i < pairs.length; i += 2) { + row.put((String) pairs[i], pairs[i + 1]); + } + return row; } private void verifyStandardDataRows(JSONObject result) { - verifyDataRows( - result, - rows("Jake", "USA", "California", 4, 2023, 70), - rows("Hello", "USA", "New York", 4, 2023, 30), - rows("John", "Canada", "Ontario", 4, 2023, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20)); + verifyStandardDataRows(result, "name", "country", "state", "month", "year", "age"); + } + + /** + * Verify the four canonical state_country rows independently of column order. + * + *

    The schema check above ({@code verifySchema}) is set-equality on column names; the data row + * check {@code verifyDataRows} is positional. The two paths the analytics-engine route can take + * return columns in different orders (parquet preserves storage order, the v2 / Lucene path + * preserves {@code _source} iteration order), and either is valid given the contract {@code + * verifySchema} declares. To avoid baking either order into the test, this helper takes the + * canonical-position column names as varargs and reorders the canonical row values to match + * whatever column order the response actually returned. + * + * @param result the response JSON + * @param canonicalColumns the column names of the four canonical rows in {@code (name-or-renamed, + * country-or-renamed, state, month, year, age-or-renamed)} order. Pass the rename target + * where applicable. + */ + private void verifyStandardDataRows(JSONObject result, String... canonicalColumns) { + if (canonicalColumns.length != 6) { + throw new IllegalArgumentException( + "verifyStandardDataRows expects 6 canonical column names; got " + + canonicalColumns.length); + } + Object[][] canonicalValues = + new Object[][] { + {"Jake", "USA", "California", 4, 2023, 70}, + {"Hello", "USA", "New York", 4, 2023, 30}, + {"John", "Canada", "Ontario", 4, 2023, 25}, + {"Jane", "Canada", "Quebec", 4, 2023, 20} + }; + Map[] expectedRows = new LinkedHashMap[canonicalValues.length]; + for (int i = 0; i < canonicalValues.length; i++) { + Map row = new LinkedHashMap<>(); + for (int c = 0; c < canonicalColumns.length; c++) { + row.put(canonicalColumns[c], canonicalValues[i][c]); + } + expectedRows[i] = row; + } + verifyDataRowsByColumn(result, expectedRows); + } + + /** + * Match expected rows against the response by column name, ignoring the response's column + * emission order. For each expected row (a {@code column-name -> value} map), the value at each + * schema position is looked up by name. Tests using this helper become engine-order agnostic: a + * parquet-backed response and a Lucene-backed response yield the same assertion outcome as long + * as the column-name-to-value mapping agrees. + */ + @SafeVarargs + @SuppressWarnings("varargs") + private final void verifyDataRowsByColumn( + JSONObject result, Map... expectedRows) { + JSONArray schema = result.getJSONArray("schema"); + int n = schema.length(); + String[] columnOrder = new String[n]; + for (int i = 0; i < n; i++) { + columnOrder[i] = schema.getJSONObject(i).getString("name"); + } + @SuppressWarnings({"unchecked", "rawtypes"}) + Matcher[] rowMatchers = new Matcher[expectedRows.length]; + for (int r = 0; r < expectedRows.length; r++) { + Object[] reordered = new Object[n]; + for (int c = 0; c < n; c++) { + if (!expectedRows[r].containsKey(columnOrder[c])) { + throw new IllegalArgumentException( + "Expected row at index " + + r + + " is missing canonical value for response column [" + + columnOrder[c] + + "]; provided keys: " + + expectedRows[r].keySet()); + } + reordered[c] = expectedRows[r].get(columnOrder[c]); + } + rowMatchers[r] = rows(reordered); + } + verifyDataRows(result, rowMatchers); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index ec6f8583b23..0bd7ac803f9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -216,4 +216,27 @@ public void testSpathAutoExtractWithSort() throws IOException { verifySchema(result, schema("doc.user.name", "string")); verifyDataRowsInOrder(result, rows("Alice"), rows("John")); } + + @Test + public void testSpathAutoExtractWithMultiFieldEval() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_cmd | spath input=doc" + + " | eval doc.user.name=doc.user.name, doc.user.age=doc.user.age" + + " | fields doc.user.name, doc.user.age"); + verifySchema(result, schema("doc.user.name", "string"), schema("doc.user.age", "string")); + verifyDataRows(result, rows("Alice", "25"), rows("John", "30")); + } + + @Test + public void testSpathAutoExtractWithSeparateEvalCommands() throws IOException { + JSONObject result = + executeQuery( + "source=test_spath_cmd | spath input=doc" + + " | eval doc.user.name=doc.user.name" + + " | eval doc.user.age=doc.user.age" + + " | fields doc.user.name, doc.user.age"); + verifySchema(result, schema("doc.user.name", "string"), schema("doc.user.age", "string")); + verifyDataRows(result, rows("Alice", "25"), rows("John", "30")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteParseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteParseCommandIT.java index e25470a6e53..d5030ffa181 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteParseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteParseCommandIT.java @@ -9,9 +9,12 @@ import java.io.IOException; import org.junit.Test; +import org.opensearch.client.ResponseException; import org.opensearch.sql.ppl.ParseCommandIT; public class CalciteParseCommandIT extends ParseCommandIT { + private static final String SUGGESTION_MATCHING_CONTENT = "capture groups must be alphanumeric"; + @Override public void init() throws Exception { super.init(); @@ -25,10 +28,9 @@ public void testParseErrorInvalidGroupNameUnderscore() throws IOException { String.format( "source=%s | parse email '.+@(?.+)' | fields email", TEST_INDEX_BANK)); fail("Should have thrown an exception for underscore in named capture group"); - } catch (Exception e) { + } catch (ResponseException e) { assertTrue(e.getMessage().contains("Invalid capture group name 'host_name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -39,10 +41,9 @@ public void testParseErrorInvalidGroupNameHyphen() throws IOException { String.format( "source=%s | parse email '.+@(?.+)' | fields email", TEST_INDEX_BANK)); fail("Should have thrown an exception for hyphen in named capture group"); - } catch (Exception e) { + } catch (ResponseException e) { assertTrue(e.getMessage().contains("Invalid capture group name 'host-name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -53,10 +54,9 @@ public void testParseErrorInvalidGroupNameStartingWithDigit() throws IOException String.format( "source=%s | parse email '.+@(?<1host>.+)' | fields email", TEST_INDEX_BANK)); fail("Should have thrown an exception for group name starting with digit"); - } catch (Exception e) { + } catch (ResponseException e) { assertTrue(e.getMessage().contains("Invalid capture group name '1host'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -67,10 +67,9 @@ public void testParseErrorInvalidGroupNameSpecialCharacter() throws IOException String.format( "source=%s | parse email '.+@(?.+)' | fields email", TEST_INDEX_BANK)); fail("Should have thrown an exception for special character in named capture group"); - } catch (Exception e) { + } catch (ResponseException e) { assertTrue(e.getMessage().contains("Invalid capture group name 'host@name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java index 44cc4a3aaf0..5943a3c5d30 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java @@ -9,6 +9,10 @@ import static org.opensearch.sql.util.MatcherUtils.*; import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import org.hamcrest.Matcher; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.Test; import org.opensearch.sql.common.antlr.SyntaxCheckException; @@ -61,12 +65,41 @@ public void testMultipleReplace() throws IOException { schema("year", "int"), schema("age", "int")); - verifyDataRows( + // Match by column name — analytics-engine and v2 paths return columns in different orders. + verifyDataRowsByColumn( result, - rows("Jake", "United States", "California", 4, 2023, 70), - rows("Hello", "United States", "New York", 4, 2023, 30), - rows("John", "Canada", "Ontario", 4, 2023, 25), - rows("Joseph", "Canada", "Quebec", 4, 2023, 20)); + rowOf( + "name", + "Jake", + "country", + "United States", + "state", + "California", + "month", + 4, + "year", + 2023, + "age", + 70), + rowOf( + "name", + "Hello", + "country", + "United States", + "state", + "New York", + "month", + 4, + "year", + 2023, + "age", + 30), + rowOf( + "name", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023, + "age", 25), + rowOf( + "name", "Joseph", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023, + "age", 20)); } @Test @@ -121,12 +154,40 @@ public void testEmptyStringReplacement() throws IOException { schema("year", "int"), schema("age", "int")); - verifyDataRows( + verifyDataRowsByColumn( result, - rows("Jake", "", "California", 4, 2023, 70), - rows("Hello", "", "New York", 4, 2023, 30), - rows("John", "Canada", "Ontario", 4, 2023, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20)); + rowOf( + "name", + "Jake", + "country", + "", + "state", + "California", + "month", + 4, + "year", + 2023, + "age", + 70), + rowOf( + "name", + "Hello", + "country", + "", + "state", + "New York", + "month", + 4, + "year", + 2023, + "age", + 30), + rowOf( + "name", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023, + "age", 25), + rowOf( + "name", "Jane", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023, "age", + 20)); } @Test @@ -146,12 +207,40 @@ public void testMultipleFieldsInClause() throws IOException { schema("year", "int"), schema("age", "int")); - verifyDataRows( + verifyDataRowsByColumn( result, - rows("Jake", "United States", "California", 4, 2023, 70), - rows("Hello", "United States", "New York", 4, 2023, 30), - rows("John", "Canada", "Ontario", 4, 2023, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20)); + rowOf( + "name", + "Jake", + "country", + "United States", + "state", + "California", + "month", + 4, + "year", + 2023, + "age", + 70), + rowOf( + "name", + "Hello", + "country", + "United States", + "state", + "New York", + "month", + 4, + "year", + 2023, + "age", + 30), + rowOf( + "name", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023, + "age", 25), + rowOf( + "name", "Jane", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023, "age", + 20)); } @Test @@ -164,10 +253,16 @@ public void testReplaceNonExistentField() { String.format( "source = %s | replace 'USA' WITH 'United States' IN non_existent_field", TEST_INDEX_STATE_COUNTRY))); - verifyErrorMessageContains( - e, - "field [non_existent_field] not found; input fields are: [name, country, state, month," - + " year, age, _id, _index, _score, _maxscore, _sort, _routing]"); + // Order-agnostic — analytics-engine and v2 paths emit the input-field list in different + // orders (parquet preserves storage order, Lucene preserves _source iteration order). + // Assert that the prefix and every expected field name appear somewhere in the message. + verifyErrorMessageContains(e, "field [non_existent_field] not found; input fields are:"); + verifyErrorMessageContains(e, "name"); + verifyErrorMessageContains(e, "country"); + verifyErrorMessageContains(e, "state"); + verifyErrorMessageContains(e, "month"); + verifyErrorMessageContains(e, "year"); + verifyErrorMessageContains(e, "age"); } @Test @@ -259,12 +354,40 @@ public void testMultiplePairsInSingleCommand() throws IOException { schema("year", "int"), schema("age", "int")); - verifyDataRows( + verifyDataRowsByColumn( result, - rows("Jake", "United States", "California", 4, 2023, 70), - rows("Hello", "United States", "New York", 4, 2023, 30), - rows("John", "CA", "Ontario", 4, 2023, 25), - rows("Jane", "CA", "Quebec", 4, 2023, 20)); + rowOf( + "name", + "Jake", + "country", + "United States", + "state", + "California", + "month", + 4, + "year", + 2023, + "age", + 70), + rowOf( + "name", + "Hello", + "country", + "United States", + "state", + "New York", + "month", + 4, + "year", + 2023, + "age", + 30), + rowOf( + "name", "John", "country", "CA", "state", "Ontario", "month", 4, "year", 2023, "age", + 25), + rowOf( + "name", "Jane", "country", "CA", "state", "Quebec", "month", 4, "year", 2023, "age", + 20)); } @Test @@ -402,4 +525,61 @@ public void testEscapeSequence_noMatchLiteral() throws IOException { // Pattern "foo\*bar" matches literal "foo*bar", not "fooXbar", so original value returned verifyDataRows(result, rows("fooXbar")); } + + /** + * Build a {@code column -> value} map from interleaved varargs ({@code key1, val1, key2, val2, + * ...}). Preserves insertion order so the expected-row mapping reads naturally at the call site. + */ + private static Map rowOf(Object... pairs) { + if (pairs.length % 2 != 0) { + throw new IllegalArgumentException("rowOf expects an even number of args (key, value, ...)"); + } + Map row = new LinkedHashMap<>(); + for (int i = 0; i < pairs.length; i += 2) { + row.put((String) pairs[i], pairs[i + 1]); + } + return row; + } + + /** + * Match expected rows against the response by column name, ignoring the response's column + * emission order. The two paths the analytics-engine route can take return columns in different + * orders (parquet preserves storage order, the v2 / Lucene path preserves {@code _source} + * iteration order), and either is valid given the contract {@code verifySchema} declares (set + * equality on column names). To avoid baking either order into the test, this helper reorders + * each expected row to match whatever column order the response actually returned. + * + *

    Mirrors the helper in {@code CalcitePPLRenameIT} (commit 59c728b) — same pattern applied to + * PPL {@code replace} command tests. + */ + @SafeVarargs + @SuppressWarnings("varargs") + private final void verifyDataRowsByColumn( + JSONObject result, Map... expectedRows) { + JSONArray schema = result.getJSONArray("schema"); + int n = schema.length(); + String[] columnOrder = new String[n]; + for (int i = 0; i < n; i++) { + columnOrder[i] = schema.getJSONObject(i).getString("name"); + } + @SuppressWarnings({"unchecked", "rawtypes"}) + Matcher[] rowMatchers = new Matcher[expectedRows.length]; + for (int r = 0; r < expectedRows.length; r++) { + Object[] reordered = new Object[n]; + for (int c = 0; c < n; c++) { + if (!expectedRows[r].containsKey(columnOrder[c])) { + throw new IllegalArgumentException( + "Expected row at index " + + r + + " is missing canonical value for response column [" + + columnOrder[c] + + "]; provided keys: " + + expectedRows[r].keySet()); + } + reordered[c] = expectedRows[r].get(columnOrder[c]); + } + rowMatchers[r] = rows(reordered); + } + verifyDataRows(result, rowMatchers); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java index 5ff41bcb3f5..5c381bb5346 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java @@ -6,8 +6,11 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder; import static org.opensearch.sql.util.MatcherUtils.verifySchema; @@ -24,12 +27,18 @@ public void init() throws Exception { enableCalcite(); disallowCalciteFallback(); loadIndex(Index.BANK); + loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.STATE_COUNTRY); + loadIndex(Index.EVENTS); } @Test public void testReverse() throws IOException { JSONObject result = - executeQuery(String.format("source=%s | fields account_number | reverse", TEST_INDEX_BANK)); + executeQuery( + String.format( + "source=%s | fields account_number | sort account_number | reverse", + TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); verifyDataRowsInOrder( result, rows(32), rows(25), rows(20), rows(18), rows(13), rows(6), rows(1)); @@ -40,7 +49,8 @@ public void testReverseWithFields() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | fields account_number, firstname | reverse", TEST_INDEX_BANK)); + "source=%s | fields account_number, firstname | sort account_number | reverse", + TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); verifyDataRowsInOrder( result, @@ -70,7 +80,8 @@ public void testDoubleReverse() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | fields account_number | reverse | reverse", TEST_INDEX_BANK)); + "source=%s | fields account_number | sort account_number | reverse | reverse", + TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); verifyDataRowsInOrder( result, rows(1), rows(6), rows(13), rows(18), rows(20), rows(25), rows(32)); @@ -80,7 +91,9 @@ public void testDoubleReverse() throws IOException { public void testReverseWithHead() throws IOException { JSONObject result = executeQuery( - String.format("source=%s | fields account_number | reverse | head 3", TEST_INDEX_BANK)); + String.format( + "source=%s | fields account_number | sort account_number | reverse | head 3", + TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); verifyDataRowsInOrder(result, rows(32), rows(25), rows(20)); } @@ -90,21 +103,413 @@ public void testReverseWithComplexPipeline() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | where account_number > 18 | fields account_number | reverse | head 2", + "source=%s | where account_number > 18 | fields account_number | sort" + + " account_number | reverse | head 2", TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); verifyDataRowsInOrder(result, rows(32), rows(25)); } @Test - public void testReverseWithMultipleSorts() throws IOException { - // Use the existing BANK data but with a simpler, more predictable query + public void testReverseWithDescendingSort() throws IOException { + // Test reverse with descending sort (- age) JSONObject result = executeQuery( String.format( - "source=%s | sort account_number | fields account_number | reverse | head 3", + "source=%s | sort - account_number | fields account_number | reverse", TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); - verifyDataRowsInOrder(result, rows(32), rows(25), rows(20)); + verifyDataRowsInOrder( + result, rows(1), rows(6), rows(13), rows(18), rows(20), rows(25), rows(32)); + } + + @Test + public void testReverseWithMixedSortDirections() throws IOException { + // Test reverse with mixed sort directions (- age, + firstname) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number, + firstname | fields account_number, firstname" + + " | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); + verifyDataRowsInOrder( + result, + rows(1, "Amber JOHnny"), + rows(6, "Hattie"), + rows(13, "Nanette"), + rows(18, "Dale"), + rows(20, "Elinor"), + rows(25, "Virginia"), + rows(32, "Dillard")); + } + + @Test + public void testDoubleReverseWithDescendingSort() throws IOException { + // Test double reverse with descending sort (- age) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number | fields account_number | reverse | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint")); + verifyDataRowsInOrder( + result, rows(32), rows(25), rows(20), rows(18), rows(13), rows(6), rows(1)); + } + + @Test + public void testDoubleReverseWithMixedSortDirections() throws IOException { + // Test double reverse with mixed sort directions (- age, + firstname) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number, + firstname | fields account_number, firstname" + + " | reverse | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); + verifyDataRowsInOrder( + result, + rows(32, "Dillard"), + rows(25, "Virginia"), + rows(20, "Elinor"), + rows(18, "Dale"), + rows(13, "Nanette"), + rows(6, "Hattie"), + rows(1, "Amber JOHnny")); + } + + @Test + public void testReverseIgnoredWithoutSortOrTimestamp() throws IOException { + // Test that reverse is ignored when there's no explicit sort and no @timestamp field + // BANK index doesn't have @timestamp, so reverse should be ignored + JSONObject result = + executeQuery( + String.format("source=%s | fields account_number | reverse | head 3", TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint")); + // Without sort or @timestamp, reverse is ignored, so data comes in natural order + // The first 3 documents in natural order (ascending by account_number) + verifyDataRowsInOrder(result, rows(1), rows(6), rows(13)); + } + + @Test + public void testReverseWithTimestampField() throws IOException { + // Test that reverse with @timestamp field sorts by @timestamp DESC + // TIME_TEST_DATA index has @timestamp field + JSONObject result = + executeQuery( + String.format( + "source=%s | fields value, category, `@timestamp` | reverse | head 5", + TEST_INDEX_TIME_DATA)); + verifySchema( + result, + schema("value", "int"), + schema("category", "string"), + schema("@timestamp", "timestamp")); + // Should return the latest 5 records (highest @timestamp values) in descending order + // Based on the test data, these are IDs 100, 99, 98, 97, 96 + verifyDataRowsInOrder( + result, + rows(8762, "A", "2025-08-01 03:47:41"), + rows(7348, "C", "2025-08-01 02:00:56"), + rows(9015, "B", "2025-08-01 01:14:11"), + rows(6489, "D", "2025-08-01 00:27:26"), + rows(8676, "A", "2025-07-31 23:40:33")); + } + + @Test + public void testReverseWithTimestampAndExplicitSort() throws IOException { + // Test that explicit sort takes precedence over @timestamp + JSONObject result = + executeQuery( + String.format( + "source=%s | fields value, category | sort value | reverse | head 3", + TEST_INDEX_TIME_DATA)); + verifySchema(result, schema("value", "int"), schema("category", "string")); + // Should reverse the value sort, giving us the highest values + verifyDataRowsInOrder(result, rows(9521, "B"), rows(9367, "A"), rows(9321, "A")); + } + + @Test + public void testStreamstatsWithReverse() throws IOException { + // Test that reverse is ignored when used directly after streamstats + // streamstats maintains order via __stream_seq__, but this field is projected out + // and doesn't create a detectable collation, so reverse is ignored (no-op) + JSONObject result = + executeQuery( + String.format( + "source=%s | streamstats count() as cnt, avg(age) as avg | reverse", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("cnt", "bigint"), + schema("avg", "double")); + // Reverse is ignored, so data remains in original streamstats order + verifyDataRowsInOrder( + result, + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 2, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 3, 41.666666666666664), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 4, 36.25)); + } + + @Test + public void testStreamstatsWindowWithReverse() throws IOException { + // Test that reverse is ignored after streamstats with window + JSONObject result = + executeQuery( + String.format( + "source=%s | streamstats window=2 avg(age) as avg | reverse", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("avg", "double")); + // Reverse is ignored, data remains in original order + // Window=2 means average of current and previous row (sliding window of size 2) + verifyDataRowsInOrder( + result, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 27.5), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5)); + } + + @Test + public void testStreamstatsByWithReverse() throws IOException { + // Test that reverse is effective after streamstats with partitioning (by clause). + // Backtracking finds the __stream_seq__ sort from streamstats and reverses its order. + JSONObject result = + executeQuery( + String.format( + "source=%s | streamstats count() as cnt, avg(age) as avg by country | reverse", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("cnt", "bigint"), + schema("avg", "double")); + // With backtracking, reverse now works and reverses the __stream_seq__ order + verifyDataRowsInOrder( + result, + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5), + rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25), + rows("Hello", "USA", "New York", 4, 2023, 30, 2, 50), + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70)); + } + + @Test + public void testStreamstatsWithSortThenReverse() throws IOException { + // Test that reverse works when there's an explicit sort after streamstats + // The explicit sort creates a collation that reverse can detect and reverse + JSONObject result = + executeQuery( + String.format( + "source=%s | streamstats count() as cnt | sort age | reverse | head 3", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("cnt", "bigint")); + // With explicit sort and reverse, data is in descending age order + verifyDataRowsInOrder( + result, + rows("Jake", "USA", "California", 4, 2023, 70, 1), + rows("Hello", "USA", "New York", 4, 2023, 30, 2), + rows("John", "Canada", "Ontario", 4, 2023, 25, 3)); + } + + // ==================== Tests for blocking operators ==================== + // These tests verify that reverse is a no-op after blocking operators + // that destroy collation (aggregate, join, window functions). + + @Test + public void testReverseAfterAggregationIsNoOp() throws IOException { + // Test that reverse is a no-op after aggregation (stats) + // Aggregation destroys input ordering, so reverse has no collation to reverse + // and BANK index has no @timestamp, so reverse should be ignored + JSONObject result = + executeQuery( + String.format("source=%s | stats count() as c by gender | reverse", TEST_INDEX_BANK)); + verifySchema(result, schema("c", "bigint"), schema("gender", "string")); + // Data should be in aggregation order (no reverse applied) + // Use verifyDataRows (unordered) since aggregation order is not guaranteed + verifyDataRows(result, rows(4, "M"), rows(3, "F")); + } + + @Test + public void testReverseAfterAggregationWithSort() throws IOException { + // Test that reverse works when there's an explicit sort after aggregation + JSONObject result = + executeQuery( + String.format( + "source=%s | stats count() as c by gender | sort gender | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("c", "bigint"), schema("gender", "string")); + // With explicit sort and reverse, data should be in descending gender order + // Sort by gender ASC: F, M -> Reverse: M, F + verifyDataRowsInOrder(result, rows(4, "M"), rows(3, "F")); + } + + @Test + public void testReverseSortAggregationIsNoOp() throws IOException { + // Test that sort before aggregation doesn't allow reverse after aggregation + // Even with sort before stats, aggregation destroys the collation + JSONObject result = + executeQuery( + String.format( + "source=%s | sort account_number | stats count() as c by gender | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("c", "bigint"), schema("gender", "string")); + // Reverse is a no-op because aggregation destroyed the sort collation + // Use verifyDataRows (unordered) since aggregation order is not guaranteed + verifyDataRows(result, rows(4, "M"), rows(3, "F")); + } + + @Test + public void testReverseAfterWhereWithSort() throws IOException { + // Test that reverse works through filter (where) to find the sort + JSONObject result = + executeQuery( + String.format( + "source=%s | sort account_number | where balance > 30000 | fields account_number," + + " balance | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint"), schema("balance", "bigint")); + // Reverse should work through the filter to reverse the sort + // Balances > 30000: 1(39225), 13(32838), 25(40540), 32(48086) + // Reversed by account_number: 32, 25, 13, 1 + verifyDataRowsInOrder( + result, rows(32, 48086), rows(25, 40540), rows(13, 32838), rows(1, 39225)); + } + + @Test + public void testReverseAfterEvalWithSort() throws IOException { + // Test that reverse works through eval (project) to find the sort + JSONObject result = + executeQuery( + String.format( + "source=%s | sort account_number | eval double_balance = balance * 2 | fields" + + " account_number, double_balance | reverse | head 3", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint"), schema("double_balance", "bigint")); + // Reverse should work through eval to reverse the sort + // Account balances: 32(48086), 25(40540), 20(16418) + // double_balance: 32(96172), 25(81080), 20(32836) + verifyDataRowsInOrder(result, rows(32, 96172), rows(25, 81080), rows(20, 32836)); + } + + @Test + public void testReverseAfterMultipleFilters() throws IOException { + // Test that reverse works through multiple filters + JSONObject result = + executeQuery( + String.format( + "source=%s | sort account_number | where balance > 20000 | where age > 30 | fields" + + " account_number, balance, age | reverse", + TEST_INDEX_BANK)); + verifySchema( + result, + schema("account_number", "bigint"), + schema("balance", "bigint"), + schema("age", "int")); + // Reverse should work through multiple filters + // balance > 20000 AND age > 30: 1(39225, 32), 25(40540, 39), 32(48086, 34) + // Reversed by account_number: 32, 25, 1 + verifyDataRowsInOrder(result, rows(32, 48086, 34), rows(25, 40540, 39), rows(1, 39225, 32)); + } + + @Test + public void testReverseWithTimestampAfterAggregation() throws IOException { + // Test that reverse uses @timestamp when aggregation destroys collation + // TIME_TEST_DATA has @timestamp field + JSONObject result = + executeQuery( + String.format( + "source=%s | stats count() as c by category | reverse", TEST_INDEX_TIME_DATA)); + verifySchema(result, schema("c", "bigint"), schema("category", "string")); + // Even though aggregation destroys collation, there's no @timestamp in the + // aggregated result, so reverse is a no-op + // Use verifyDataRows (unordered) since aggregation order is not guaranteed + // Categories: A=26, B=25, C=25, D=24 + verifyDataRows(result, rows(26, "A"), rows(25, "B"), rows(25, "C"), rows(24, "D")); + } + + // ==================== Timechart with Reverse tests ==================== + // These tests verify that reverse works correctly with timechart. + // Timechart always adds a sort at the end of its plan (tier 1), so reverse + // will find the collation via metadata query and flip the sort direction. + + @Test + public void testTimechartWithReverse() throws IOException { + // Timechart adds ORDER BY @timestamp ASC at the end + // Reverse should flip it to DESC, returning data in reverse chronological order + JSONObject result = executeQuery("source=events | timechart span=1m count() | reverse"); + verifySchema(result, schema("@timestamp", "timestamp"), schema("count()", "bigint")); + // Events data has timestamps at 00:00, 00:01, 00:02, 00:03, 00:04 + // Reverse should return them in descending order + verifyDataRowsInOrder( + result, + rows("2024-07-01 00:04:00", 1), + rows("2024-07-01 00:03:00", 1), + rows("2024-07-01 00:02:00", 1), + rows("2024-07-01 00:01:00", 1), + rows("2024-07-01 00:00:00", 1)); + } + + @Test + public void testTimechartWithCustomTimefieldAndReverse() throws IOException { + // Test timechart with custom timefield (birthdate instead of @timestamp) + // PR #4784 allows users to specify a custom timefield in timechart + // The sort should be on the custom field, not @timestamp + JSONObject result = + executeQuery( + String.format( + "source=%s | timechart timefield=birthdate span=1year count() | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("birthdate", "timestamp"), schema("count()", "bigint")); + // Bank data has birthdates in 2017 and 2018 + // Timechart groups by year: 2017 (2 records), 2018 (5 records) + // Reverse should return 2018 before 2017 + verifyDataRowsInOrder(result, rows("2018-01-01 00:00:00", 5), rows("2017-01-01 00:00:00", 2)); + } + + @Test + public void testTimechartWithGroupByAndReverse() throws IOException { + // Test timechart with group by and reverse + // The sort is on both @timestamp and the group by field + JSONObject result = executeQuery("source=events | timechart span=1h count() by host | reverse"); + verifySchema( + result, + schema("@timestamp", "timestamp"), + schema("host", "string"), + schema("count()", "bigint")); + // All events are in the same hour, so only one time bucket + // Hosts are grouped and results are reversed (DESC order: web-02, web-01, db-01) + verifyDataRowsInOrder( + result, + rows("2024-07-01 00:00:00", "web-02", 2), + rows("2024-07-01 00:00:00", "web-01", 2), + rows("2024-07-01 00:00:00", "db-01", 1)); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRexCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRexCommandIT.java index f7a50ee0676..eca08b1fc11 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRexCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteRexCommandIT.java @@ -14,6 +14,8 @@ import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalciteRexCommandIT extends PPLIntegTestCase { + private static final String SUGGESTION_MATCHING_CONTENT = "capture groups must be alphanumeric"; + @Override public void init() throws Exception { super.init(); @@ -61,8 +63,7 @@ public void testRexErrorInvalidGroupNameUnderscore() throws IOException { fail("Should have thrown an exception for underscore in named capture group"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid capture group name 'user_name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -77,8 +78,7 @@ public void testRexErrorInvalidGroupNameHyphen() throws IOException { fail("Should have thrown an exception for hyphen in named capture group"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid capture group name 'user-name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -93,8 +93,7 @@ public void testRexErrorInvalidGroupNameStartingWithDigit() throws IOException { fail("Should have thrown an exception for group name starting with digit"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid capture group name '1user'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } @@ -109,8 +108,7 @@ public void testRexErrorInvalidGroupNameSpecialCharacter() throws IOException { fail("Should have thrown an exception for special character in named capture group"); } catch (Exception e) { assertTrue(e.getMessage().contains("Invalid capture group name 'user@name'")); - assertTrue( - e.getMessage().contains("must start with a letter and contain only letters and digits")); + assertTrue(e.getMessage().contains(SUGGESTION_MATCHING_CONTENT)); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java index dcf36f510bf..fa0b21e622f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java @@ -823,6 +823,41 @@ public void testMultipleStreamstats() throws IOException { rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5)); } + @Test + public void testMultipleStreamstatsWithWindow() throws IOException { + // Test case from GitHub issue #4800: chained streamstats with window=2 + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats window=2 avg(age) as avg_age by state, country" + + " | streamstats window=2 avg(avg_age) as avg_state_age by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("avg_age", "double"), + schema("avg_state_age", "double")); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5), + rows(null, "Canada", null, 4, 2023, 10, 10, 15), + rows("Kevin", null, null, 4, 2023, null, null, null)); + } + + // TODO: Fix chained reset_before + window streamstats (nested correlate issue, see #4800) + // The reset path still uses correlate, and the window self-join copies it into the right side, + // causing Calcite's RelDecorrelator to fail on duplicate correlate references. + @Test public void testMultipleStreamstatsWithNull1() throws IOException { JSONObject actual = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java index 44df58b7ab8..676cf162b03 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java @@ -5,6 +5,9 @@ package org.opensearch.sql.calcite.remote; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.util.MatcherUtils.*; import static org.opensearch.sql.util.MatcherUtils.rows; @@ -141,6 +144,50 @@ public void testTransposeLowerLimit() throws IOException { rows("age", "32", "36", "28", "33", "36")); } + /** + * Regression test for #5172: transpose fails when input has a field named 'value', because the + * internal unpivot column was also hardcoded as 'value'. + */ + @Test + public void testTransposeWithValueFieldNameCollision() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | stats count() as value, avg(age) as avg_age | transpose", + TEST_INDEX_ACCOUNT)); + + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + var dataRows = result.getJSONArray("datarows"); + // Verify that each transposed row has distinct correct values + // (not all duplicated from the 'value' field) + assertEquals(2, dataRows.length()); + boolean foundValue = false; + boolean foundAvgAge = false; + for (int i = 0; i < dataRows.length(); i++) { + var row = dataRows.getJSONArray(i); + String colName = row.getString(0); + if ("value".equals(colName)) { + foundValue = true; + // count should be 1000 (total accounts) + assertEquals("1000", row.getString(1)); + } else if ("avg_age".equals(colName)) { + foundAvgAge = true; + // avg_age should not equal the count value + assertNotEquals("1000", row.getString(1)); + } + } + assertTrue("Should have 'value' row in transposed result", foundValue); + assertTrue("Should have 'avg_age' row in transposed result", foundAvgAge); + } + @Test public void testTransposeColumnName() throws IOException { var result = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteUnionCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteUnionCommandIT.java new file mode 100644 index 00000000000..1dbd34357ab --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteUnionCommandIT.java @@ -0,0 +1,270 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOCATIONS_TYPE_CONFLICT; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteUnionCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.ACCOUNT); + loadIndex(Index.BANK); + loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.TIME_TEST_DATA2); + loadIndex(Index.LOCATIONS_TYPE_CONFLICT); + } + + @Test + public void testBasicUnionTwoSubsearches() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union " + + "[search source=%s | where age < 30 | eval age_group = \\\"young\\\"] " + + "[search source=%s | where age >= 30 | eval age_group = \\\"adult\\\"] " + + "| stats count by age_group | sort age_group", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("count", null, "bigint"), schema("age_group", null, "string")); + verifyDataRows(result, rows(549L, "adult"), rows(451L, "young")); + } + + @Test + public void testUnionThreeSubsearches() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where state = \\\"IL\\\" | eval region" + + " = \\\"Illinois\\\"] [search source=%s | where state = \\\"TN\\\" | eval" + + " region = \\\"Tennessee\\\"] [search source=%s | where state = \\\"CA\\\" |" + + " eval region = \\\"California\\\"] | stats count by region | sort region", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("count", null, "bigint"), schema("region", null, "string")); + verifyDataRows(result, rows(17L, "California"), rows(22L, "Illinois"), rows(25L, "Tennessee")); + } + + @Test + public void testUnionDirectTableNames() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union %s, %s | where account_number = 1 | fields firstname, city", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + + verifySchema(result, schema("firstname", null, "string"), schema("city", null, "string")); + + verifyDataRows(result, rows("Amber", "Brogan"), rows("Amber JOHnny", "Brogan")); + } + + @Test + public void testUnionMixedDirectTableAndSubsearch() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union %s, [search source=%s | where age > 30] | stats count() as total", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(1006L)); + } + + @Test + public void testUnionWithDifferentIndicesSchemaMerge() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where age > 35 | fields account_number," + + " firstname, balance] [search source=%s | where age > 35 | fields" + + " account_number, balance] | stats count() as total_count", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + + verifySchema(result, schema("total_count", null, "bigint")); + verifyDataRows(result, rows(241L)); + } + + @Test + public void testUnionNumericCoercion_BigIntPlusInteger() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where account_number = 1 | fields balance] [search" + + " source=%s | where account_number = 1 | eval balance = 100 | fields balance]" + + " | head 2", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("balance", null, "bigint")); + + assertEquals(2, result.getJSONArray("datarows").length()); + } + + @Test + public void testUnionIncompatibleTypes_MultipleFieldConflicts() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where account_number = 1 | fields firstname, age," + + " balance] [search source=%s | where place_id = 1001 | fields description," + + " age, place_id] | head 2", + TEST_INDEX_ACCOUNT, TEST_INDEX_LOCATIONS_TYPE_CONFLICT)); + + verifySchema( + result, + schema("firstname", null, "string"), + schema("age", null, "string"), + schema("balance", null, "bigint"), + schema("description", null, "string"), + schema("place_id", null, "int")); + + assertEquals(2, result.getJSONArray("datarows").length()); + } + + @Test + public void testUnionAllDatasetsDifferentSchemas() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where account_number = 1 | fields account_number," + + " balance] [search source=%s | where place_id = 1001 | fields description," + + " place_id] [search source=%s | where category = \\\"A\\\" | fields category," + + " value] | stats count() as total", + TEST_INDEX_ACCOUNT, + TEST_INDEX_LOCATIONS_TYPE_CONFLICT, + "opensearch-sql_test_index_time_data")); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(28L)); + } + + @Test + public void testUnionMidPipeline_SingleExplicitDataset() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | where gender = \\\"M\\\" " + + "| union [search source=%s | where gender = \\\"F\\\"] " + + "| stats count() as total", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(1000L)); + } + + @Test + public void testUnionWithExplicitOrdering() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where account_number = 1 | fields account_number," + + " balance] [search source=%s | where account_number = 6 | fields" + + " account_number, balance] | sort balance desc", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema( + result, schema("account_number", null, "bigint"), schema("balance", null, "bigint")); + + verifyDataRows(result, rows(1L, 39225L), rows(6L, 5686L)); + } + + @Test + public void testUnionWithMaxout() throws IOException { + String ppl = + "| union maxout=5 " + + "[search source=%s | where gender = \\\"M\\\"] " + + "[search source=%s | where gender = \\\"F\\\"]"; + JSONObject result = executeQuery(String.format(ppl, TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema( + result, + schema("account_number", null, "bigint"), + schema("firstname", null, "string"), + schema("address", null, "string"), + schema("balance", null, "bigint"), + schema("gender", null, "string"), + schema("city", null, "string"), + schema("employer", null, "string"), + schema("state", null, "string"), + schema("age", null, "bigint"), + schema("email", null, "string"), + schema("lastname", null, "string")); + + assertEquals(5, result.getJSONArray("datarows").length()); + } + + @Test + public void testUnionWithEmptySubsearch() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union " + + "[search source=%s | where age > 25] " + + "[search source=%s | where age > 200 | eval impossible = \\\"yes\\\"] " + + "| stats count", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("count", null, "bigint")); + verifyDataRows(result, rows(733L)); + } + + @Test + public void testUnionWithAllEmptyDatasets() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union " + + "[search source=%s | where age > 1000] " + + "[search source=%s | where age > 1000] " + + "| stats count() as total", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(0L)); + } + + @Test + public void testUnionPreservesDuplicatesExactCopy() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union " + + "[search source=%s | where account_number = 1] " + + "[search source=%s | where account_number = 1] " + + "[search source=%s | where account_number = 1] " + + "| stats count() as total", + TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT, TEST_INDEX_ACCOUNT)); + + verifySchema(result, schema("total", null, "bigint")); + verifyDataRows(result, rows(3L)); + } + + @Test + public void testUnionWithSingleSubsearchThrowsError() { + Exception exception = + assertThrows( + ResponseException.class, + () -> + executeQuery( + String.format( + "| union " + "[search source=%s | where age > 30]", TEST_INDEX_ACCOUNT))); + + assertTrue(exception.getMessage().contains("Union command requires at least two datasets")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java index d47656471b0..22e12e71556 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java @@ -30,6 +30,7 @@ import org.opensearch.sql.analysis.Analyzer; import org.opensearch.sql.analysis.ExpressionAnalyzer; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.DataSourceService; @@ -187,21 +188,19 @@ public void onResponse(ExecutionEngine.QueryResponse response) { @Override public void onFailure(Exception e) { - if (e instanceof SyntaxCheckException) { - throw (SyntaxCheckException) e; - } else if (e instanceof QueryEngineException) { - throw (QueryEngineException) e; - } else if (e instanceof UnsupportedCursorRequestException) { - throw (UnsupportedCursorRequestException) e; - } else if (e instanceof NoCursorException) { - throw (NoCursorException) e; - } else if (e instanceof UnsupportedOperationException) { - throw (UnsupportedOperationException) e; - } else if (e instanceof IllegalArgumentException) { - // most exceptions thrown by Calcite when resolve a plan. - throw (IllegalArgumentException) e; - } else { - throw new IllegalStateException("Exception happened during execution", e); + switch (e) { + case ErrorReport errorReport -> throw errorReport; + case SyntaxCheckException syntaxCheckException -> throw syntaxCheckException; + case QueryEngineException queryEngineException -> throw queryEngineException; + case UnsupportedCursorRequestException unsupportedCursorRequestException -> + throw unsupportedCursorRequestException; + case NoCursorException noCursorException -> throw noCursorException; + case UnsupportedOperationException unsupportedOperationException -> + throw unsupportedOperationException; + case IllegalArgumentException illegalArgumentException -> + // most exceptions thrown by Calcite when resolve a plan. + throw illegalArgumentException; + default -> throw new IllegalStateException("Exception happened during execution", e); } } }, diff --git a/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java b/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java index a53c04d8710..f014ab587de 100644 --- a/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java @@ -9,6 +9,7 @@ import java.io.IOException; import lombok.SneakyThrows; +import org.apache.hc.core5.http.io.entity.EntityUtils; import org.json.JSONObject; import org.junit.After; import org.junit.Assert; @@ -150,8 +151,11 @@ private void assertDataSourceCount(int expected) { @SneakyThrows private Response performRequest(Request request) { try { - return client().performRequest(request); + Response response = client().performRequest(request); + System.err.println("Successful response: " + EntityUtils.toString(response.getEntity())); + return response; } catch (ResponseException e) { + System.err.println("Failed response: " + EntityUtils.toString(e.getResponse().getEntity())); return e.getResponse(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/DateFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/DateFunctionsIT.java index 56c60eb91e6..243dfbc90d2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/DateFunctionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/DateFunctionsIT.java @@ -10,7 +10,6 @@ import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.matchesPattern; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.time.Month; import org.joda.time.DateTime; @@ -21,7 +20,7 @@ import org.junit.Test; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.search.SearchHit; @@ -262,10 +261,8 @@ private SearchHit[] execute(String sqlRequest) throws IOException { final JSONObject jsonObject = executeRequest(makeRequest(sqlRequest)); final XContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(jsonObject.toString())); + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, jsonObject.toString()); return SearchResponse.fromXContent(parser).getHits().getHits(); } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/JSONRequestIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/JSONRequestIT.java index aa182144852..f88220cd31a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/JSONRequestIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/JSONRequestIT.java @@ -10,7 +10,6 @@ import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.lessThan; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.util.Map; import org.json.JSONObject; @@ -18,7 +17,7 @@ import org.junit.Test; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.search.SearchHit; @@ -200,10 +199,8 @@ private SearchHits query(String request) throws IOException { final JSONObject jsonObject = executeRequest(request); final XContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(jsonObject.toString())); + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, jsonObject.toString()); return SearchResponse.fromXContent(parser).getHits(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/MathFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/MathFunctionsIT.java index 5f2dee6c0b8..cd59a0945c0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/MathFunctionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/MathFunctionsIT.java @@ -10,13 +10,12 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import org.junit.Ignore; import org.junit.Test; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.search.SearchHit; @@ -217,10 +216,8 @@ private SearchHit[] query(String select, String... statements) throws IOExceptio executeQueryWithStringOutput(select + " " + FROM + " " + String.join(" ", statements)); final XContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(response)); + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, response); return SearchResponse.fromXContent(parser).getHits().getHits(); } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/NestedFieldQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/NestedFieldQueryIT.java index 82d94ca1163..d8997140ba8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/NestedFieldQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/NestedFieldQueryIT.java @@ -14,7 +14,6 @@ import static org.opensearch.sql.util.MatcherUtils.hitAll; import static org.opensearch.sql.util.MatcherUtils.kvString; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.math.BigDecimal; import java.util.ArrayList; @@ -32,7 +31,7 @@ import org.opensearch.action.search.SearchResponse; import org.opensearch.client.ResponseException; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; @@ -813,10 +812,8 @@ private SearchResponse execute(String sql) throws IOException { final JSONObject jsonObject = executeQuery(sql); final XContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(jsonObject.toString())); + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, jsonObject.toString()); return SearchResponse.fromXContent(parser); } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/QueryFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/QueryFunctionsIT.java index 4090f03ed8f..d6472a3cb3f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/QueryFunctionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/QueryFunctionsIT.java @@ -13,7 +13,6 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_TYPE; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; @@ -30,7 +29,7 @@ import org.junit.Test; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.search.SearchHit; @@ -242,10 +241,8 @@ private SearchResponse execute(String sql) throws IOException { final JSONObject jsonObject = executeQuery(sql); final XContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(jsonObject.toString())); + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, jsonObject.toString()); return SearchResponse.fromXContent(parser); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLFunctionsIT.java index f3a6d03bc77..7ad6a8c5be4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLFunctionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLFunctionsIT.java @@ -27,7 +27,6 @@ import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.util.Date; import java.util.stream.IntStream; @@ -38,7 +37,7 @@ import org.junit.Test; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.search.SearchHit; @@ -844,10 +843,8 @@ private SearchHits query(String query) throws IOException { final String rsp = executeQueryWithStringOutput(query); final XContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(rsp)); + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, rsp); return SearchResponse.fromXContent(parser).getHits(); } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SourceFieldIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SourceFieldIT.java index a7d8acb3a7d..026b739ce4e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SourceFieldIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SourceFieldIT.java @@ -7,7 +7,6 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.util.Set; import org.json.JSONObject; @@ -16,7 +15,7 @@ import org.junit.Test; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.search.SearchHit; @@ -100,10 +99,8 @@ private SearchHits query(String query) throws IOException { final JSONObject jsonObject = executeQuery(query); final XContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(jsonObject.toString())); + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, jsonObject.toString()); return SearchResponse.fromXContent(parser).getHits(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/plugin/AnalyticsEngineCompatIT.java b/integ-test/src/test/java/org/opensearch/sql/plugin/AnalyticsEngineCompatIT.java new file mode 100644 index 00000000000..5cd89fa7cd9 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/plugin/AnalyticsEngineCompatIT.java @@ -0,0 +1,21 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin; + +import org.junit.Test; +import org.opensearch.test.rest.OpenSearchRestTestCase; + +/** + * Smoke test: verifies that opensearch-sql loads cleanly alongside arrow-flight-rpc and + * analytics-engine. A successful cluster start is the only assertion — no sql-specific logic runs. + */ +public class AnalyticsEngineCompatIT extends OpenSearchRestTestCase { + + @Test + public void testClusterStarted() { + // If the cluster booted, all three plugins loaded without classloader errors. + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java index 099992c9298..b1c794130b0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java @@ -69,6 +69,38 @@ public void testConvertWithStats() { "source=%s | convert auto(balance) | stats avg(balance) by gender"); } + @Test + public void testConvertMktimeFunction() { + verifyQueryThrowsCalciteError( + "source=%s | eval date_str = '2003-10-18 20:07:13' | convert mktime(date_str) | fields" + + " date_str"); + } + + @Test + public void testConvertCtimeFunction() { + verifyQueryThrowsCalciteError( + "source=%s | eval timestamp = 1066507633 | convert ctime(timestamp) | fields timestamp"); + } + + @Test + public void testConvertDur2secFunction() { + verifyQueryThrowsCalciteError( + "source=%s | eval duration = '01:23:45' | convert dur2sec(duration) | fields duration"); + } + + @Test + public void testConvertMstimeFunction() { + verifyQueryThrowsCalciteError( + "source=%s | eval time_str = '03:45' | convert mstime(time_str) | fields time_str"); + } + + @Test + public void testConvertWithTimeformat() { + verifyQueryThrowsCalciteError( + "source=%s | eval date_str = '18/10/2003 20:07:13' | convert" + + " timeformat=\\\"%%d/%%m/%%Y %%H:%%M:%%S\\\" mktime(date_str) | fields date_str"); + } + private void verifyQueryThrowsCalciteError(String query) { Exception e = assertThrows(Exception.class, () -> executeQuery(String.format(query, TEST_INDEX_BANK))); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java index 1a2f5337998..25e7c12ffff 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DataTypeIT.java @@ -145,6 +145,47 @@ public void testNumericFieldFromString() throws Exception { client().performRequest(deleteRequest); } + @Test + public void testBooleanFieldFromNumberAcrossWildcardIndices() throws Exception { + // Reproduce issue #5269: querying across indices where same field has conflicting types + // (boolean vs text) and the text-typed index stores a numeric value like 0. + String indexBool = "repro_bool_test_bb"; + String indexText = "repro_bool_test_aa"; + + try { + // Create index with boolean mapping + Request createBool = new Request("PUT", "/" + indexBool); + createBool.setJsonEntity( + "{\"mappings\":{\"properties\":{\"flag\":{\"type\":\"boolean\"}," + + "\"startTime\":{\"type\":\"date_nanos\"}}}}"); + client().performRequest(createBool); + + // Create index with text mapping + Request createText = new Request("PUT", "/" + indexText); + createText.setJsonEntity( + "{\"mappings\":{\"properties\":{\"flag\":{\"type\":\"text\"}," + + "\"startTime\":{\"type\":\"date_nanos\"}}}}"); + client().performRequest(createText); + + // Insert boolean value into boolean-typed index + Request insertBool = new Request("PUT", "/" + indexBool + "/_doc/1?refresh=true"); + insertBool.setJsonEntity("{\"startTime\":\"2026-03-25T20:25:00.000Z\",\"flag\":false}"); + client().performRequest(insertBool); + + // Insert numeric value into text-typed index + Request insertText = new Request("PUT", "/" + indexText + "/_doc/1?refresh=true"); + insertText.setJsonEntity("{\"startTime\":\"2026-03-24T20:25:00.000Z\",\"flag\":0}"); + client().performRequest(insertText); + + // Query across both indices with wildcard — should not throw an error + JSONObject result = executeQuery("source=repro_bool_test_* | fields flag"); + assertEquals(2, result.getJSONArray("datarows").length()); + } finally { + client().performRequest(new Request("DELETE", "/" + indexBool)); + client().performRequest(new Request("DELETE", "/" + indexText)); + } + } + @Test public void testBooleanFieldFromString() throws Exception { final int docId = 2; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index 84fdfdceb43..837865a3585 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -10,6 +10,7 @@ import static org.opensearch.sql.common.setting.Settings.Key.CALCITE_ENGINE_ENABLED; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_GRAPH_EMPLOYEES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MVEXPAND_EDGE_CASES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; @@ -28,6 +29,7 @@ public void init() throws Exception { loadIndex(Index.DOG); loadIndex(Index.STRINGS); loadIndex(Index.MVEXPAND_EDGE_CASES); + loadIndex(Index.GRAPH_EMPLOYEES); } @Test @@ -240,6 +242,39 @@ public void testConvertCommand() throws IOException { } } + @Test + public void testGraphLookup() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + JSONObject result; + try { + result = + executeQuery( + String.format( + "source=%s | graphLookup %s start=reportsTo edge=reportsTo-->name" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } + verifyQuery(result); + } + + @Test + public void testGraphLookupTopLevel() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + JSONObject result; + try { + result = + executeQuery( + String.format( + "graphLookup %s start='Eliot' edge=reportsTo-->name as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } + verifyQuery(result); + } + private void verifyQuery(JSONObject result) throws IOException { if (isCalciteEnabled()) { assertFalse(result.getJSONArray("datarows").isEmpty()); @@ -495,4 +530,20 @@ public void testMvExpandInvalidLimitNegative() throws IOException { assertThat(error.getString("type"), equalTo("SyntaxCheckException")); } } + + @Test + public void testUnionUnsupportedInV2() throws IOException { + JSONObject result; + try { + result = + executeQuery( + String.format( + "| union [search source=%s | where age < 30] [search source=%s | where age >=" + + " 30]", + TEST_INDEX_BANK, TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } + verifyQuery(result); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java index a386987e532..ad4c3475818 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/WhereCommandIT.java @@ -193,6 +193,27 @@ public void testIsNotNullFunction() throws IOException { verifyDataRows(result, rows("Amber JOHnny")); } + @Test + public void testIsNullPredicate() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where age IS NULL | fields firstname", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows(result, rows("Virginia")); + } + + @Test + public void testIsNotNullPredicate() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where age IS NOT NULL and like(firstname, 'Ambe_%%') | fields" + + " firstname", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRows(result, rows("Amber JOHnny")); + } + @Test public void testWhereWithMetadataFields() throws IOException { JSONObject result = diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index e55e406de7b..24e67f43f13 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -262,27 +262,12 @@ public void testCrossClusterRenameFullWildcard() throws IOException { JSONObject result = executeQuery(String.format("search source=%s | rename * as old_*", TEST_INDEX_DOG_REMOTE)); verifyColumn( - result, - columnName("old_dog_name"), - columnName("old_holdersName"), - columnName("old_age"), - columnName("old__id"), - columnName("old__index"), - columnName("old__score"), - columnName("old__maxscore"), - columnName("old__sort"), - columnName("old__routing")); + result, columnName("old_dog_name"), columnName("old_holdersName"), columnName("old_age")); verifySchema( result, schema("old_dog_name", "string"), schema("old_holdersName", "string"), - schema("old_age", "bigint"), - schema("old__id", "string"), - schema("old__index", "string"), - schema("old__score", "float"), - schema("old__maxscore", "float"), - schema("old__sort", "bigint"), - schema("old__routing", "string")); + schema("old_age", "bigint")); } @Test @@ -528,4 +513,15 @@ public void testCrossClusterMvExpandWithLimit() throws IOException { verifySchema(result, schema("username", "string"), schema("skills.name", "string")); verifyDataRows(result, rows("limituser", "a"), rows("limituser", "b")); } + + @Test + public void testCrossClusterUnion() throws IOException { + JSONObject result = + executeQuery( + String.format( + "| union [search source=%s | where age < 30] [search source=%s | where age >= 30] |" + + " stats count() by gender", + TEST_INDEX_BANK_REMOTE, TEST_INDEX_BANK_REMOTE)); + verifyColumn(result, columnName("count()"), columnName("gender")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java index ed25a1df2d9..4c90b7dce03 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java @@ -613,6 +613,29 @@ public void testUserWithoutMappingPermissionCannotGetFieldMappings() throws IOEx } } + @Test + public void testUserWithoutMappingPermissionGetsPermissionDeniedErrorCode() throws IOException { + // Test that security exceptions return PERMISSION_DENIED error code, not INDEX_NOT_FOUND + try { + executeQueryAsUser(String.format("describe %s", TEST_INDEX_BANK), NO_MAPPING_USER); + fail("Expected security exception for user without mapping permission"); + } catch (ResponseException e) { + assertEquals(403, e.getResponse().getStatusLine().getStatusCode()); + String responseBody = + org.opensearch.sql.legacy.TestUtils.getResponseBody(e.getResponse(), false); + JSONObject responseJson = new JSONObject(responseBody); + + // Verify the error code is PERMISSION_DENIED, not INDEX_NOT_FOUND + assertTrue("Response should have error field", responseJson.has("error")); + JSONObject error = responseJson.getJSONObject("error"); + assertTrue("Error should have code field", error.has("code")); + assertEquals( + "Security exception should return PERMISSION_DENIED error code", + "PERMISSION_DENIED", + error.getString("code")); + } + } + @Test public void testUserWithoutSettingsPermissionCannotGetSettings() throws IOException { // Test that user without settings permission gets 403 error diff --git a/integ-test/src/test/java/org/opensearch/sql/security/SQLCursorPermissionsIT.java b/integ-test/src/test/java/org/opensearch/sql/security/SQLCursorPermissionsIT.java new file mode 100644 index 00000000000..cacec3b2623 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/security/SQLCursorPermissionsIT.java @@ -0,0 +1,192 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.security; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; + +import java.io.IOException; +import java.util.Base64; +import java.util.Locale; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestUtils; + +/** + * Regression test for SQL cursor pagination under Fine-Grained Access Control (FGAC). + * + *

    Exercises the legacy V1 cursor path (triggered by {@code SELECT ... LIMIT n} with {@code + * fetch_size}). Before the fix, page 2 would return 403 because the continuation SearchRequest was + * created with no indices, which Security resolves to a wildcard and denies under FGAC. + */ +public class SQLCursorPermissionsIT extends SQLIntegTestCase { + + private static final String ACCOUNT_USER = "account_cursor_user"; + private static final String ACCOUNT_ROLE = "account_cursor_role"; + private static final String STRONG_PASSWORD = "StrongPassword123!"; + + private boolean initialized = false; + + @Override + protected void init() throws Exception { + loadIndex(Index.ACCOUNT); + createSecurityRolesAndUsers(); + } + + private void createSecurityRolesAndUsers() throws IOException { + if (initialized) { + return; + } + createRole(ACCOUNT_ROLE, TEST_INDEX_ACCOUNT); + createUser(ACCOUNT_USER, ACCOUNT_ROLE); + initialized = true; + } + + private void createRole(String roleName, String indexPattern) throws IOException { + Request request = new Request("PUT", "/_plugins/_security/api/roles/" + roleName); + request.setJsonEntity( + String.format( + Locale.ROOT, + """ + { + "cluster_permissions": [ + "cluster:admin/opensearch/ppl", + "cluster:admin/opensearch/sql" + ], + "index_permissions": [{ + "index_patterns": [ + "%s" + ], + "allowed_actions": [ + "indices:data/read/search*", + "indices:admin/mappings/get", + "indices:admin/mappings/fields/get*", + "indices:monitor/settings/get", + "indices:data/read/point_in_time/create", + "indices:data/read/point_in_time/delete" + ] + }] + } + """, + indexPattern)); + RequestOptions.Builder opts = RequestOptions.DEFAULT.toBuilder(); + opts.addHeader("Content-Type", "application/json"); + request.setOptions(opts); + + Response response = client().performRequest(request); + int status = response.getStatusLine().getStatusCode(); + assertTrue(status == 200 || status == 201); + } + + private void createUser(String username, String roleName) throws IOException { + Request userRequest = new Request("PUT", "/_plugins/_security/api/internalusers/" + username); + userRequest.setJsonEntity( + String.format( + Locale.ROOT, + """ + { + "password": "%s", + "backend_roles": [], + "attributes": {} + } + """, + STRONG_PASSWORD)); + RequestOptions.Builder opts = RequestOptions.DEFAULT.toBuilder(); + opts.addHeader("Content-Type", "application/json"); + userRequest.setOptions(opts); + + Response userResponse = client().performRequest(userRequest); + int userStatus = userResponse.getStatusLine().getStatusCode(); + assertTrue(userStatus == 200 || userStatus == 201); + + Request mappingRequest = new Request("PUT", "/_plugins/_security/api/rolesmapping/" + roleName); + mappingRequest.setJsonEntity( + String.format( + Locale.ROOT, + """ + { + "backend_roles": [], + "hosts": [], + "users": ["%s"] + } + """, + username)); + mappingRequest.setOptions(opts); + + Response mappingResponse = client().performRequest(mappingRequest); + int mappingStatus = mappingResponse.getStatusLine().getStatusCode(); + assertTrue(mappingStatus == 200 || mappingStatus == 201); + } + + private JSONObject executeSqlAsUser(String body, String username) throws IOException { + Request request = new Request("POST", "/_plugins/_sql"); + request.setJsonEntity(body); + RequestOptions.Builder opts = RequestOptions.DEFAULT.toBuilder(); + opts.addHeader("Content-Type", "application/json"); + opts.addHeader( + "Authorization", + "Basic " + + Base64.getEncoder().encodeToString((username + ":" + STRONG_PASSWORD).getBytes())); + request.setOptions(opts); + + Response response = client().performRequest(request); + assertEquals(200, response.getStatusLine().getStatusCode()); + return new JSONObject(TestUtils.getResponseBody(response, true)); + } + + @Test + public void simpleSelectUnderFgacSucceeds() throws IOException { + JSONObject result = + executeSqlAsUser( + String.format( + Locale.ROOT, + "{\"query\": \"SELECT firstname FROM %s LIMIT 1\"}", + TEST_INDEX_ACCOUNT), + ACCOUNT_USER); + assertTrue(result.has("datarows")); + } + + /** + * Regression for SQL cursor pagination under FGAC. Triggers the V1 cursor path (LIMIT with + * fetch_size) and advances through multiple continuation pages. Before the fix, page 2 returned + * 403 because the continuation SearchRequest carried no indices, which Security resolves to a + * wildcard and denies. + */ + @Test + public void cursorPaginationUnderFgacSucceedsAcrossPages() throws IOException { + // LIMIT forces the V1 cursor path (V2's CanPaginateVisitor rejects LIMIT). The V1 path is + // the one that constructs the continuation SearchRequest without indices, which Security + // denies under FGAC before this fix. + JSONObject firstPage = + executeSqlAsUser( + String.format( + Locale.ROOT, + "{\"fetch_size\": 50, \"query\": \"SELECT age, balance FROM %s LIMIT 234\"}", + TEST_INDEX_ACCOUNT), + ACCOUNT_USER); + assertTrue("first page must include a cursor; body=" + firstPage, firstPage.has("cursor")); + String cursor = firstPage.getString("cursor"); + assertFalse("first page cursor must not be empty", cursor.isEmpty()); + assertTrue( + "expected V1 cursor (prefix 'd:'), got: " + + cursor.substring(0, Math.min(6, cursor.length())), + cursor.startsWith("d:")); + + int pages = 1; + while (!cursor.isEmpty()) { + JSONObject next = + executeSqlAsUser( + String.format(Locale.ROOT, "{\"cursor\": \"%s\"}", cursor), ACCOUNT_USER); + cursor = next.optString("cursor", ""); + pages++; + } + // 234 rows / 50 per page = 5 pages + assertEquals("expected 5 V1 cursor pages under FGAC", 5, pages); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java index 72faeec2afe..1f8aba29dea 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java @@ -18,7 +18,6 @@ import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -28,7 +27,7 @@ import org.junit.Test; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.search.SearchHits; @@ -283,10 +282,8 @@ private SearchHits query(String query) throws IOException { final String rsp = executeQueryWithStringOutput(query); final XContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(rsp)); + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, rsp); return SearchResponse.fromXContent(parser).getHits(); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ExistsPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ExistsPushdownIT.java new file mode 100644 index 00000000000..08ceb8c35f9 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ExistsPushdownIT.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +/** + * Explain-plan integration tests asserting that {@code IS NOT NULL} / {@code IS NULL} predicates + * push down as native OpenSearch {@code exists} DSL rather than as serialized script queries. + * + *

    Before this change both predicates serialized through the compounded script engine, producing + * a {@code "script"} clause in the pushdown DSL. After this change the v2 filter builder emits + * {@code {"exists": {"field": ...}}} directly for {@code IS NOT NULL}, and a {@code bool} query + * with a single {@code must_not[exists]} child for {@code IS NULL}. This matches what downstream + * tooling, serverless / AOSS, and the Calcite path already produce. + */ +public class ExistsPushdownIT extends SQLIntegTestCase { + + // Anchored on the surrounding `sourceBuilder=...`, `pitId=` tokens in OpenSearchRequest's + // toString() output. Test-only coupling: if that request-string format changes (token renamed, + // pitId removed), this helper breaks even when the DSL shape is still correct. Update the regex + // anchors if that happens. + private static final Pattern SOURCE_BUILDER_JSON = + Pattern.compile("sourceBuilder=(\\{.*?\\}), pitId=", Pattern.DOTALL); + + /** Extracts and unescapes the sourceBuilder JSON embedded in the explain request string. */ + private static String extractSourceBuilderJson(String explain) { + Matcher m = SOURCE_BUILDER_JSON.matcher(explain); + assertTrue("Explain should contain sourceBuilder JSON:\n" + explain, m.find()); + return m.group(1).replace("\\\"", "\""); + } + + @Override + protected void init() throws Exception { + loadIndex(Index.ACCOUNT); + } + + private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT; + + @Test + public void testIsNotNullPushesDownAsExistsQuery() throws IOException { + String explain = + explainQuery("SELECT age FROM " + TEST_INDEX + " WHERE age IS NOT NULL LIMIT 1"); + String sourceBuilder = extractSourceBuilderJson(explain); + + assertTrue( + "IS NOT NULL should push down as native exists DSL:\n" + sourceBuilder, + sourceBuilder.contains("\"exists\"")); + assertTrue( + "IS NOT NULL exists DSL should target the 'age' field:\n" + sourceBuilder, + sourceBuilder.contains("\"field\":\"age\"")); + assertFalse( + "IS NOT NULL should not fall through to a script query:\n" + sourceBuilder, + sourceBuilder.contains("\"script\"")); + } + + @Test + public void testIsNullPushesDownAsMustNotExistsQuery() throws IOException { + String explain = explainQuery("SELECT age FROM " + TEST_INDEX + " WHERE age IS NULL LIMIT 1"); + String sourceBuilder = extractSourceBuilderJson(explain); + + assertTrue( + "IS NULL should push down as bool/must_not[exists] DSL:\n" + sourceBuilder, + sourceBuilder.contains("\"must_not\"")); + assertTrue( + "IS NULL should wrap a native exists clause:\n" + sourceBuilder, + sourceBuilder.contains("\"exists\"")); + assertTrue( + "IS NULL exists DSL should target the 'age' field:\n" + sourceBuilder, + sourceBuilder.contains("\"field\":\"age\"")); + assertFalse( + "IS NULL should not fall through to a script query:\n" + sourceBuilder, + sourceBuilder.contains("\"script\"")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExecutionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExecutionIT.java new file mode 100644 index 00000000000..36e78567d54 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExecutionIT.java @@ -0,0 +1,227 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.util.TestUtils.createIndexByRestClient; +import static org.opensearch.sql.util.TestUtils.isIndexExist; +import static org.opensearch.sql.util.TestUtils.performRequest; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.Assume; +import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +/** + * Happy-path execution tests for the vectorSearch() SQL table function. These tests run an actual + * k-NN query against a small in-memory knn_vector index and assert that results come back ordered + * by score and respect any WHERE filters. + * + *

    The k-NN plugin is not provisioned by the default integ-test cluster — each test calls {@link + * Assume#assumeTrue} on {@link #isKnnPluginInstalled()} so the class is silently skipped when k-NN + * is absent. Run locally against a cluster that has opensearch-knn installed. Provisioning k-NN in + * CI is a separate follow-up. + */ +public class VectorSearchExecutionIT extends SQLIntegTestCase { + + private static final String TEST_INDEX = "vector_exec_test"; + + // 6 docs in 2D — two clusters so filter/radial tests have distinguishable results. + // Cluster A near [1, 1]: docs 1-3 (state=TX, ages 25/30/40). + // Cluster B near [9, 9]: docs 4-6 (state=CA, ages 28/35/45). + // Pin Lucene HNSW + L2 so efficient filtering is deterministic (k-NN supports efficient + // filtering only on lucene+hnsw and faiss+hnsw/ivf) and the L2 → 1/(1+d) scoring used by the + // radial min_score test is well-defined. + private static final String MAPPING = + "{" + + " \"settings\": {\"index\": {\"knn\": true}}," + + " \"mappings\": {" + + " \"properties\": {" + + " \"embedding\": {" + + " \"type\": \"knn_vector\"," + + " \"dimension\": 2," + + " \"method\": {" + + " \"name\": \"hnsw\"," + + " \"engine\": \"lucene\"," + + " \"space_type\": \"l2\"" + + " }" + + " }," + + " \"state\": {\"type\": \"keyword\"}," + + " \"age\": {\"type\": \"integer\"}" + + " }" + + " }" + + "}"; + + private static final String BULK_BODY = + "{\"index\":{\"_id\":\"1\"}}\n" + + "{\"embedding\":[1.0,1.0],\"state\":\"TX\",\"age\":25}\n" + + "{\"index\":{\"_id\":\"2\"}}\n" + + "{\"embedding\":[1.1,0.9],\"state\":\"TX\",\"age\":30}\n" + + "{\"index\":{\"_id\":\"3\"}}\n" + + "{\"embedding\":[0.9,1.2],\"state\":\"TX\",\"age\":40}\n" + + "{\"index\":{\"_id\":\"4\"}}\n" + + "{\"embedding\":[9.0,9.0],\"state\":\"CA\",\"age\":28}\n" + + "{\"index\":{\"_id\":\"5\"}}\n" + + "{\"embedding\":[9.1,8.8],\"state\":\"CA\",\"age\":35}\n" + + "{\"index\":{\"_id\":\"6\"}}\n" + + "{\"embedding\":[8.7,9.3],\"state\":\"CA\",\"age\":45}\n"; + + @Override + protected void init() throws Exception { + Assume.assumeTrue("k-NN plugin not installed on test cluster", isKnnPluginInstalled()); + if (!isIndexExist(client(), TEST_INDEX)) { + createIndexByRestClient(client(), TEST_INDEX, MAPPING); + Request bulk = new Request("POST", "/" + TEST_INDEX + "/_bulk?refresh=true"); + bulk.setJsonEntity(BULK_BODY); + performRequest(client(), bulk); + } + } + + private static boolean isKnnPluginInstalled() { + try { + Response response = client().performRequest(new Request("GET", "/_cat/plugins?h=component")); + String body = new String(response.getEntity().getContent().readAllBytes()); + return body.contains("opensearch-knn"); + } catch (IOException e) { + return false; + } + } + + // ── Top-k happy path ──────────────────────────────────────────────── + + @Test + public void testTopKReturnsNearestSortedByScore() throws IOException { + JSONObject result = + executeJdbcRequest( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 1.0]', option='k=3') AS v " + + "LIMIT 3"); + + // All 3 returned docs should be from cluster A (ids 1-3), ordered by score desc. + JSONArray rows = result.getJSONArray("datarows"); + assertEquals("Expected 3 rows:\n" + result, 3, rows.length()); + for (int i = 0; i < rows.length(); i++) { + String id = rows.getJSONArray(i).getString(0); + assertTrue( + "Row " + i + " id=" + id + " should be from cluster A (1,2,3):\n" + result, + id.equals("1") || id.equals("2") || id.equals("3")); + } + // Scores must be non-increasing. + double prev = Double.POSITIVE_INFINITY; + for (int i = 0; i < rows.length(); i++) { + double score = rows.getJSONArray(i).getDouble(1); + assertTrue( + "Scores must be sorted desc, got " + score + " after " + prev + ":\n" + result, + score <= prev); + prev = score; + } + } + + // ── POST filter happy path ────────────────────────────────────────── + + @Test + public void testPostFilterReturnsOnlyMatchingDocs() throws IOException { + // Query from cluster B with WHERE state='TX' forces POST filtering to surface TX docs + // (cluster A) even though the vector is closer to cluster B. k=10 covers all 6 docs so + // post-filtering to state='TX' deterministically yields exactly {1,2,3}. filter_type=post + // is specified explicitly because the default placement is EFFICIENT — this test + // guarantees POST continues to work when the user opts into it. + JSONObject result = + executeJdbcRequest( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[9.0, 9.0]', option='k=10,filter_type=post') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 10"); + + assertRowIdsEqual(result, "1", "2", "3"); + } + + // ── EFFICIENT filter happy path ───────────────────────────────────── + + @Test + public void testEfficientFilterReturnsOnlyMatchingDocs() throws IOException { + // Query vector sits on cluster A (TX) but WHERE state='CA' forces EFFICIENT filtering to + // navigate HNSW toward CA docs. With k=3, a POST-filter implementation would return 0 rows + // (the 3 nearest candidates are all TX, which get filtered out); an efficient-filter + // implementation returns exactly the 3 CA docs {4,5,6}. This asymmetry makes the test + // discriminate between the two filter modes. + JSONObject result = + executeJdbcRequest( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 1.0]', option='k=3,filter_type=efficient') AS v " + + "WHERE v.state = 'CA' " + + "LIMIT 3"); + + assertRowIdsEqual(result, "4", "5", "6"); + } + + // ── Radial happy paths ────────────────────────────────────────────── + + @Test + public void testRadialMaxDistanceReturnsOnlyNearDocs() throws IOException { + // max_distance=1.0 (L2) centered on [1,1] includes all 3 cluster A docs (max L2 ≈ 0.22) + // and excludes cluster B which is ~11 units away. + JSONObject result = + executeJdbcRequest( + "SELECT v._id " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 1.0]', option='max_distance=1.0') AS v " + + "LIMIT 10"); + + assertRowIdsEqual(result, "1", "2", "3"); + } + + @Test + public void testRadialMinScoreReturnsOnlyHighScoreDocs() throws IOException { + // For L2 space, OpenSearch score = 1/(1+distance). Centered on [1,1], cluster A docs + // score ~0.82-1.0 and cluster B scores ~0.08. min_score=0.5 yields exactly {1,2,3}. + JSONObject result = + executeJdbcRequest( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 1.0]', option='min_score=0.5') AS v " + + "LIMIT 10"); + + JSONArray rows = result.getJSONArray("datarows"); + for (int i = 0; i < rows.length(); i++) { + double score = rows.getJSONArray(i).getDouble(1); + assertTrue("Row " + i + " score=" + score + " should be >= 0.5:\n" + result, score >= 0.5); + } + assertRowIdsEqual(result, "1", "2", "3"); + } + + /** Asserts the result's datarows column 0 contains exactly the given ids (as a set). */ + private static void assertRowIdsEqual(JSONObject result, String... expectedIds) { + JSONArray rows = result.getJSONArray("datarows"); + assertEquals( + "Expected " + expectedIds.length + " rows:\n" + result, expectedIds.length, rows.length()); + Set expected = new HashSet<>(Arrays.asList(expectedIds)); + Set actual = new HashSet<>(); + for (int i = 0; i < rows.length(); i++) { + actual.add(rows.getJSONArray(i).getString(0)); + } + assertEquals("Row id set mismatch:\n" + result, expected, actual); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExplainIT.java new file mode 100644 index 00000000000..8719189b13a --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchExplainIT.java @@ -0,0 +1,559 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +/** + * Explain-plan integration tests for vectorSearch SQL table function. These tests verify DSL + * push-down shape via _explain. They do NOT require the k-NN plugin since _explain only parses and + * plans the query without executing it against a knn index. + */ +public class VectorSearchExplainIT extends SQLIntegTestCase { + + // Matches WrapperQueryBuilder's base64 payload in explain JSON. The explain output escapes + // quotes as \", so the regex tolerates both \" and " forms around the query key/value. + private static final Pattern WRAPPER_PAYLOAD = + Pattern.compile("\\\\?\"query\\\\?\":\\\\?\"([A-Za-z0-9+/=]+)\\\\?\""); + // Anchored on the surrounding `sourceBuilder=...`, `pitId=` tokens in OpenSearchRequest's + // toString() output. Test-only coupling: if that request-string format changes (token renamed, + // pitId removed), this helper breaks even when the DSL shape is still correct. Update the regex + // anchors if that happens. + private static final Pattern SOURCE_BUILDER_JSON = + Pattern.compile("sourceBuilder=(\\{.*?\\}), pitId=", Pattern.DOTALL); + + /** Decodes every base64-encoded wrapper payload in the explain output into its knn JSON. */ + private static List decodeWrapperKnnJsons(String explain) { + List payloads = new ArrayList<>(); + Matcher m = WRAPPER_PAYLOAD.matcher(explain); + while (m.find()) { + payloads.add(new String(Base64.getDecoder().decode(m.group(1)), StandardCharsets.UTF_8)); + } + return payloads; + } + + /** Returns the single wrapper knn JSON, asserting exactly one is present. */ + private static String decodeSoleKnnJson(String explain) { + List payloads = decodeWrapperKnnJsons(explain); + assertEquals( + "Expected exactly one wrapper query payload in explain:\n" + explain, 1, payloads.size()); + return payloads.get(0); + } + + /** Extracts and unescapes the sourceBuilder JSON embedded in the explain request string. */ + private static String extractSourceBuilderJson(String explain) { + Matcher m = SOURCE_BUILDER_JSON.matcher(explain); + assertTrue("Explain should contain sourceBuilder JSON:\n" + explain, m.find()); + return m.group(1).replace("\\\"", "\""); + } + + @Override + protected void init() throws Exception { + // _explain needs the index to exist for field resolution. + loadIndex(Index.ACCOUNT); + } + + private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT; + + // ── Top-k / radial DSL shape ───────────────────────────────────────── + + @Test + public void testExplainTopKProducesKnnQuery() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=5') AS v " + + "LIMIT 5"); + + assertTrue( + "Explain should contain track_scores:\n" + explain, explain.contains("track_scores")); + + // Top-k without WHERE should have the knn at the root, not wrapped in an outer bool. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Top-k without WHERE should not wrap knn in an outer bool:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue( + "knn JSON should contain the vector values:\n" + knnJson, + knnJson.contains("[1.0,2.0,3.0]")); + assertTrue("knn JSON should contain k=5:\n" + knnJson, knnJson.contains("\"k\":5")); + assertFalse( + "Top-k without WHERE should not embed a filter:\n" + knnJson, knnJson.contains("filter")); + } + + @Test + public void testExplainRadialMaxDistanceProducesKnnQuery() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='max_distance=10.5') AS v " + + "LIMIT 100"); + + // Radial without WHERE should have the knn at the root, not wrapped in an outer bool. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Radial without WHERE should not wrap knn in an outer bool:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue( + "knn JSON should contain the vector values:\n" + knnJson, knnJson.contains("[1.0,2.0]")); + assertTrue( + "knn JSON should contain max_distance=10.5:\n" + knnJson, + knnJson.contains("\"max_distance\":10.5")); + assertFalse( + "Radial without WHERE should not embed a filter:\n" + knnJson, knnJson.contains("filter")); + } + + @Test + public void testExplainRadialMinScoreProducesKnnQuery() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='min_score=0.8') AS v " + + "LIMIT 100"); + + // Radial without WHERE should have the knn at the root, not wrapped in an outer bool. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Radial without WHERE should not wrap knn in an outer bool:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue( + "knn JSON should contain the vector values:\n" + knnJson, knnJson.contains("[1.0,2.0]")); + assertTrue( + "knn JSON should contain min_score=0.8:\n" + knnJson, + knnJson.contains("\"min_score\":0.8")); + assertFalse( + "Radial without WHERE should not embed a filter:\n" + knnJson, knnJson.contains("filter")); + } + + // ── Default (EFFICIENT) pre-filter DSL shape ──────────────────────── + + @Test + public void testExplainDefaultFilterProducesKnnWithFilter() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 10"); + + // Default (EFFICIENT) shape: WHERE is embedded inside knn.filter, the knn JSON is base64- + // encoded inside a WrapperQueryBuilder, and there is no outer bool/must wrapping. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Default EFFICIENT mode should not produce bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Default EFFICIENT mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue("knn JSON should contain k=10:\n" + knnJson, knnJson.contains("\"k\":10")); + assertTrue( + "Default EFFICIENT mode must embed filter inside knn:\n" + knnJson, + knnJson.contains("filter")); + assertTrue( + "Default EFFICIENT mode must embed the WHERE predicate inside knn:\n" + knnJson, + knnJson.contains("state")); + } + + @Test + public void testExplainDefaultCompoundPredicateProducesKnnWithFilter() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10') AS v " + + "WHERE v.state = 'TX' AND v.age > 30 " + + "LIMIT 10"); + + // Compound default-mode WHERE must also route through knn.filter: no outer bool/must, and + // both predicate fields embedded inside the knn payload. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Default EFFICIENT mode should not produce bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Default EFFICIENT mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue("knn JSON should contain k=10:\n" + knnJson, knnJson.contains("\"k\":10")); + assertTrue( + "Compound default EFFICIENT must embed filter inside knn:\n" + knnJson, + knnJson.contains("filter")); + assertTrue( + "Compound default EFFICIENT must embed the state predicate inside knn:\n" + knnJson, + knnJson.contains("state")); + assertTrue( + "Compound default EFFICIENT must embed the age predicate inside knn:\n" + knnJson, + knnJson.contains("age")); + } + + @Test + public void testExplainDefaultRadialWithWhereProducesKnnWithFilter() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='max_distance=10.5') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 100"); + + // Radial + default WHERE must also use the EFFICIENT shape: no outer bool/must, radial + // parameters preserved inside the knn payload, and the WHERE predicate embedded alongside. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Default EFFICIENT mode should not produce bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Default EFFICIENT mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertTrue( + "knn JSON should contain max_distance=10.5:\n" + knnJson, + knnJson.contains("\"max_distance\":10.5")); + assertTrue( + "Radial default EFFICIENT must embed filter inside knn:\n" + knnJson, + knnJson.contains("filter")); + assertTrue( + "Radial default EFFICIENT must embed the WHERE predicate inside knn:\n" + knnJson, + knnJson.contains("state")); + } + + // ── Sort + LIMIT explain ───────────────────────────────────────────── + + @Test + public void testOrderByScoreDescExplainSucceeds() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v._score DESC " + + "LIMIT 5"); + + assertTrue( + "Explain should succeed with ORDER BY _score DESC:\n" + explain, + explain.contains("wrapper")); + } + + @Test + public void testExplainLimitWithinKSucceeds() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=10') AS v " + + "LIMIT 5"); + + assertTrue("Explain should succeed with LIMIT <= k:\n" + explain, explain.contains("wrapper")); + } + + // ── filter_type explain ───────────────────────────────────────────── + + @Test + public void testExplainFilterTypePostProducesBoolQuery() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10,filter_type=post') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 10"); + + // Explicit filter_type=post must produce the same bool.must=[knn]/bool.filter=[term] shape as + // the default, and the WHERE predicate must NOT leak into the knn payload (that would be + // efficient mode). This is the key false-positive guard: substring-only checks would pass for + // efficient mode too. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertTrue( + "Explain should contain bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertTrue( + "Explain should contain must:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + assertTrue( + "Explain should contain filter:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"filter\"")); + assertTrue( + "Explain should contain the outer state predicate:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"state.keyword\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertTrue( + "knn JSON should target the embedding field:\n" + knnJson, + knnJson.contains("\"embedding\"")); + assertFalse( + "filter_type=post must not embed the WHERE predicate inside knn:\n" + knnJson, + knnJson.contains("state")); + assertFalse( + "filter_type=post must not embed a filter inside knn:\n" + knnJson, + knnJson.contains("filter")); + } + + @Test + public void testExplainFilterTypeEfficientProducesKnnWithFilter() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v " + + "WHERE v.state = 'TX' " + + "LIMIT 5"); + + // Efficient mode: knn rebuilt with filter inside, wrapped in WrapperQueryBuilder. + // The knn JSON (including the embedded filter) is base64-encoded inside the wrapper, + // so we verify structure by: (1) no bool/must in plaintext (that would be post-filter shape), + // (2) decode the base64 payload to confirm the filter and predicate field are embedded inside + // the knn query. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Efficient mode should not produce bool query (that is post-filter shape):\n" + + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Efficient mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue( + "Efficient mode knn JSON should contain filter:\n" + knnJson, knnJson.contains("filter")); + assertTrue( + "Efficient mode knn JSON should contain the WHERE predicate field:\n" + knnJson, + knnJson.contains("state")); + } + + @Test + public void testEfficientFilterWithOrderByScoreDescSucceeds() throws IOException { + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v " + + "WHERE v.state = 'TX' " + + "ORDER BY v._score DESC " + + "LIMIT 5"); + + // Same efficient-mode shape guarantee as testExplainFilterTypeEfficientProducesKnnWithFilter, + // with an added ORDER BY _score DESC: no outer bool/must, and the WHERE predicate must be + // embedded inside the knn payload (efficient filtering, not post-filter). + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertFalse( + "Efficient mode should not produce bool query (that is post-filter shape):\n" + + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertFalse( + "Efficient mode should not contain must clause:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + + String knnJson = decodeSoleKnnJson(explain); + assertTrue( + "Efficient mode knn JSON should contain filter:\n" + knnJson, knnJson.contains("filter")); + assertTrue( + "Efficient mode knn JSON should contain the WHERE predicate field:\n" + knnJson, + knnJson.contains("state")); + } + + // ── BETWEEN / NOT IN pushdown regression guards ───────────────────── + // These tests lock in the DSL shape currently produced for BETWEEN and NOT IN predicates + // when pushed down through vectorSearch(). They exist to catch silent regressions where a + // change in the v2 FilterQueryBuilder pipeline would fall back to a serialized script query + // instead of the native range/bool shape the cluster can index-accelerate. + + @Test + public void testBetweenPushesAsRange() throws IOException { + // Pin filter_type=post to keep the regression guard aimed at the post-filter serialization + // path: these assertions lock in the outer bool/must/filter shape that only appears when + // WHERE is applied alongside knn rather than embedded under knn.filter. + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10,filter_type=post') AS v " + + "WHERE v.balance BETWEEN 50 AND 200 " + + "LIMIT 10"); + + // BETWEEN is desugared by the analyzer into AND(>=, <=), which FilterQueryBuilder renders as + // two range clauses combined under a bool. The goal here is regression lock-in: ensure the + // pushed filter is native range DSL, not a serialized script query. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertTrue( + "Explain should contain bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertTrue( + "Explain should contain must clause (knn in scoring context):\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + assertTrue( + "Explain should contain filter clause (WHERE in non-scoring context):\n" + + sourceBuilderJson, + sourceBuilderJson.contains("\"filter\"")); + assertTrue( + "BETWEEN should push as native range DSL:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"range\"")); + assertTrue( + "Range should target balance field:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"balance\"")); + // RangeQueryBuilder serializes inclusive bounds as from/to + include_lower/include_upper. Lock + // both the lower bound (50) and upper bound (200) are present in the pushed DSL. + assertTrue( + "Range should contain lower bound 50:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"from\" : 50") || sourceBuilderJson.contains("\"from\":50")); + assertTrue( + "Range should contain upper bound 200:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"to\" : 200") || sourceBuilderJson.contains("\"to\":200")); + // Script-query fallback sentinel: the CompoundedScriptEngine lang marker must NOT appear when + // BETWEEN is pushed down natively. + assertFalse( + "BETWEEN must not fall back to a serialized script query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"script\"")); + + // POST-filter mode (default): the WHERE predicate must live OUTSIDE the knn payload. + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertFalse( + "Post-filter mode must not embed the balance predicate inside knn:\n" + knnJson, + knnJson.contains("balance")); + assertFalse( + "Post-filter mode must not embed a range inside knn:\n" + knnJson, + knnJson.contains("range")); + } + + @Test + public void testNotInPushesAsMustNotTerms() throws IOException { + // Pin filter_type=post to keep the regression guard aimed at the post-filter serialization + // path: these assertions lock in the outer bool/must/filter shape that only appears when + // WHERE is applied alongside knn rather than embedded under knn.filter. + String explain = + explainQuery( + "SELECT v._id, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0, 3.0]', option='k=10,filter_type=post') AS v " + + "WHERE v.gender NOT IN ('M', 'F') " + + "LIMIT 10"); + + // v2 analyzer desugars `x NOT IN (a, b)` into `NOT(x = a OR x = b)`. FilterQueryBuilder maps + // NOT to bool.must_not and OR to bool.should, so the pushed DSL is must_not[should[term,term]] + // rather than a single terms clause. The shape we're locking in is: native bool with must_not + // on the keyword subfield, *not* a serialized script query. + String sourceBuilderJson = extractSourceBuilderJson(explain); + assertTrue( + "Explain should contain bool query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"bool\"")); + assertTrue( + "Explain should contain must clause (knn in scoring context):\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must\"")); + assertTrue( + "Explain should contain filter clause (WHERE in non-scoring context):\n" + + sourceBuilderJson, + sourceBuilderJson.contains("\"filter\"")); + assertTrue( + "NOT IN should push as bool.must_not:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"must_not\"")); + // OR-of-equals desugaring means the two literals land in a bool.should of term clauses. + assertTrue( + "NOT IN should contain should clause for OR-of-equals desugaring:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"should\"")); + assertTrue( + "NOT IN should produce term clauses for each literal:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"term\"")); + // Terms target the keyword subfield of gender (text field with .keyword multi-field). + assertTrue( + "NOT IN term clauses should target gender.keyword:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"gender.keyword\"")); + // Both literals must be present in the pushed DSL. + assertTrue( + "NOT IN should contain the 'M' literal:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"M\"")); + assertTrue( + "NOT IN should contain the 'F' literal:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"F\"")); + // Script-query fallback sentinel: native pushdown must not degrade to a serialized script. + assertFalse( + "NOT IN must not fall back to a serialized script query:\n" + sourceBuilderJson, + sourceBuilderJson.contains("\"script\"")); + + // POST-filter mode (default): the WHERE predicate must live OUTSIDE the knn payload. + String knnJson = decodeSoleKnnJson(explain); + assertTrue("knn JSON should contain knn key:\n" + knnJson, knnJson.contains("\"knn\"")); + assertFalse( + "Post-filter mode must not embed the gender predicate inside knn:\n" + knnJson, + knnJson.contains("gender")); + assertFalse( + "Post-filter mode must not embed must_not inside knn:\n" + knnJson, + knnJson.contains("must_not")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java new file mode 100644 index 00000000000..c10b3a219f6 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java @@ -0,0 +1,755 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.hamcrest.Matchers.containsString; + +import java.io.IOException; +import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +/** + * Integration tests for vectorSearch SQL table function — validation and error paths. These tests + * verify that invalid inputs are rejected with clear error messages. Explain-plan DSL shape tests + * live in {@link VectorSearchExplainIT}. + */ +public class VectorSearchIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.ACCOUNT); + } + + private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT; + + // ── Validation error paths ──────────────────────────────────────────── + + @Test + public void testMutualExclusivityRejectsKAndMaxDistance() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=5,max_distance=10') AS v")); + + assertThat(ex.getMessage(), containsString("Only one of")); + } + + @Test + public void testMutualExclusivityRejectsKAndMinScore() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=5,min_score=0.5') AS v")); + + assertThat(ex.getMessage(), containsString("Only one of")); + } + + @Test + public void testKTooLargeRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=10001') AS v")); + + assertThat(ex.getMessage(), containsString("k must be between 1 and 10000")); + } + + @Test + public void testKZeroRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=0') AS v")); + + assertThat(ex.getMessage(), containsString("k must be between 1 and 10000")); + } + + @Test + public void testUnknownOptionKeyRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=5,method.ef_search=100') AS v")); + + assertThat(ex.getMessage(), containsString("Unknown option key")); + } + + @Test + public void testEmptyVectorRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("must not be empty")); + } + + @Test + public void testInvalidFieldNameRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', " + + "field='field\\\"injection', vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid field name")); + } + + @Test + public void testMissingRequiredOptionRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='') AS v")); + + assertThat(ex.getMessage(), containsString("Missing required option")); + } + + @Test + public void testRadialWithoutLimitRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='max_distance=10.5') AS v")); + + assertThat(ex.getMessage(), containsString("LIMIT is required for radial vector search")); + } + + // ── Sort restriction validation ───────────────────────────────────────── + + @Test + public void testOrderByNonScoreFieldRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v.firstname ASC " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("unsupported sort expression")); + } + + @Test + public void testOrderByScoreAscRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v._score ASC " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("_score ASC is not supported")); + } + + // ── filter_type validation ──────────────────────────────────────────── + + @Test + public void testFilterTypeEfficientWithoutWhereRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("filter_type requires a pushdownable WHERE clause")); + } + + @Test + public void testFilterTypePostWithoutWhereRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=post') AS v " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("filter_type requires a pushdownable WHERE clause")); + } + + @Test + public void testInvalidFilterTypeRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='k=5,filter_type=bogus') AS v")); + + assertThat(ex.getMessage(), containsString("filter_type must be one of")); + } + + @Test + public void testGroupByRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v.gender, COUNT(*) FROM vectorSearch(table='" + + TEST_INDEX + + "', field='f', vector='[1.0]', option='k=5') AS v GROUP BY v.gender")); + + assertThat( + ex.getMessage(), + containsString("Aggregations are not supported on vectorSearch() relations")); + } + + @Test + public void testBareAggregateRejects() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT COUNT(*) FROM vectorSearch(table='" + + TEST_INDEX + + "', field='f', vector='[1.0]', option='k=5') AS v")); + + assertThat( + ex.getMessage(), + containsString("Aggregations are not supported on vectorSearch() relations")); + } + + // ── OFFSET / WHERE _score / filter_type=efficient script rejection ─── + + @Test + public void testOffsetRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "LIMIT 5 OFFSET 2")); + + assertThat(ex.getMessage(), containsString("OFFSET is not supported on vectorSearch()")); + assertThat(ex.getMessage(), containsString("LIMIT only")); + } + + @Test + public void testScoreInWhereRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "WHERE v._score > 0.5 " + + "LIMIT 5")); + + assertThat(ex.getMessage(), containsString("WHERE on _score is not supported")); + assertThat(ex.getMessage(), containsString("min_score")); + } + + @Test + public void testOrderByScoreDescLimitOffsetRejected() throws IOException { + // The natural user shape pairs sort with pagination: ORDER BY _score DESC LIMIT N OFFSET M. + // The planner's pushDownSort() path can collapse the sort+limit into a top-k size, so OFFSET + // must still be rejected by pushDownLimit when the combined form is used. Without this guard + // the parent builder would push `from: ` and silently shift the top-k window. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v._score DESC " + + "LIMIT 5 OFFSET 2")); + + assertThat(ex.getMessage(), containsString("OFFSET is not supported on vectorSearch()")); + } + + @Test + public void testEfficientModeRejectsScriptPredicate() throws IOException { + // WHERE age + 1 > 30 compiles to a ScriptQueryBuilder under the hood because the outer > + // is applied to an arithmetic expression, not a direct field reference. Efficient mode + // cannot embed script queries under knn.filter, so this must be rejected up front with a + // clear remediation hint instead of a cluster-side failure. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', " + + "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v " + + "WHERE v.age + 1 > 30 " + + "LIMIT 5")); + + assertThat( + ex.getMessage(), containsString("vectorSearch WHERE pre-filtering does not support")); + assertThat(ex.getMessage(), containsString("script queries")); + } + + // ── k-NN plugin capability check ────────────────────────────────────── + // The default integ-test cluster does not have the k-NN plugin installed. Execution-path + // queries against vectorSearch() should therefore fail with the clear "k-NN plugin missing" + // error from KnnPluginCapability, while _explain continues to work because the capability + // probe is deferred to scan open() and does not run during analysis/planning. + + @Test + public void testExecutionWithoutKnnPluginReturnsCapabilityError() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "LIMIT 5")); + + // Lock in the full user-facing sentence, not just loose substrings. The exact wording is + // part of the contract and regressions should fail loudly rather than keep passing on a + // subtly reworded message. + assertThat( + ex.getMessage(), + containsString( + "vectorSearch() requires the k-NN plugin, which is not installed on this cluster.")); + } + + @Test + public void testExplainWithoutKnnPluginStillWorks() throws IOException { + // _explain only parses and plans the query. It must NOT require the k-NN plugin — the + // capability probe is intentionally deferred to scan open() so pluginless clusters can + // still inspect query plans. If this test starts failing with "k-NN plugin not installed", + // the probe has leaked back into an analysis-time path. + String explain = + explainQuery( + "SELECT v._id FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "LIMIT 5"); + + // Assert the scan-operator name, not just "wrapper": the name confirms the plan reached + // the vectorSearch scan builder rather than some other scan shape. + assertThat(explain, containsString("VectorSearchIndexScan")); + assertThat(explain, containsString("wrapper")); + } + + // ── Argument shape validation ───────────────────────────────────────── + + @Test + public void testInvalidTableNameRejected() throws IOException { + // A slash is outside the SAFE_FIELD_NAME regex and is not a valid OpenSearch index character, + // so it should be rejected at the SQL layer before any cluster call is attempted. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='idx/evil', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + } + + @Test + public void testWildcardTableRejectedWithDedicatedMessage() throws IOException { + // Wildcards in a table name fan out to multiple indices, which vectorSearch() does not + // support (top-k semantics, dimension checks, and embedded filter JSON are not defined + // across heterogeneous shards). Surface a dedicated user-facing error instead of the + // generic "must contain only alphanumeric..." fallback. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='sql_vector_*', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + assertThat(ex.getMessage(), containsString("wildcards")); + assertThat(ex.getMessage(), containsString("single concrete index")); + } + + @Test + public void testMultiTargetTableRejectedWithDedicatedMessage() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='idx_a,idx_b', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + assertThat(ex.getMessage(), containsString("multi-target")); + } + + @Test + public void testDuplicateNamedArgRejected() throws IOException { + // Previously this crashed the server with 500 ArrayIndexOutOfBoundsException. Must now + // surface as a clean 400 with a user-facing message. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='a', table='b', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Duplicate argument name")); + } + + @Test + public void testUnknownNamedArgRejected() throws IOException { + // A grammar-legal but unknown name must surface as a clean 400 from the resolver. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(bogus='idx', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Unknown argument name")); + } + + @Test + public void testPositionalArgRejected() throws IOException { + // The real shape a user would hit: `vectorSearch('idx', field=..., vector=..., option=...)`. + // The V2 grammar now accepts this form so the AstBuilder can surface a clean + // SemanticCheckException instead of letting the request fall back to the legacy SQL engine, + // which previously returned 200 with zero rows. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch('idx', field='embedding', " + + "vector='[1.0, 1.0]', option='k=3') AS v LIMIT 3")); + + assertThat(ex.getMessage(), containsString("requires named arguments")); + } + + @Test + public void testCaseInsensitiveDuplicateArgRejected() throws IOException { + // Argument names are normalized to lower-case, so `table` and `TABLE` must be treated as the + // same key and rejected as a duplicate rather than silently keeping one of the two values. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='a', TABLE='b', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Duplicate argument name")); + } + + @Test + public void testTableNameAllRejected() throws IOException { + // `_all` would fan out to every index. The preview contract is a single concrete index or + // alias, so it must be rejected explicitly rather than allowed to route broadly. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='_all', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + } + + @Test + public void testTableNameSingleDotRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='.', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + } + + @Test + public void testTableNameDoubleDotRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='..', field='f', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid table name")); + } + + @Test + public void testMissingRequiredArgRejected() throws IOException { + // Omitting a required named argument (here: `field`) must produce a clean 400 rather than a + // NullPointerException or a legacy-engine fallback. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='a', " + + "vector='[1.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("requires 4 arguments")); + } + + /** + * Users running FROM vectorSearch(...) without an AS alias previously received an opaque parser + * error from the legacy SQL engine fallback. The clearer SemanticCheckException from the v2 + * engine must surface to the user instead. + */ + @Test + public void testVectorSearchRequiresAlias() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT * FROM vectorSearch(" + + "table='t', field='f', vector='[1.0]', option='k=5') " + + "LIMIT 3")); + + String body = ex.getMessage(); + assertThat(body, containsString("requires a table alias")); + assertThat(body, containsString("vectorSearch")); + } + + // Synthetic column collision (metadata vs. user field). + // vectorSearch() exposes synthetic v._id and v._score columns. A user mapping property of the + // same name would collide on the response tuple key. OpenSearch blocks _id at mapping time; + // _score is not blocked, so VectorSearchIndex rejects it at scan-build time. + + @Test + public void testUserMappingWithIdFieldIsRejectedByOpenSearch() throws IOException { + // Locks in OpenSearch's rejection of a user property named _id: without it, v._id could + // collide with a user field at response time. The exact error message belongs to OpenSearch. + String indexName = "vs_collision_id"; + deleteIndexIfExists(indexName); + + Request createIndex = new Request("PUT", "/" + indexName); + createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_id\":{\"type\":\"keyword\"}}}}"); + + expectThrows(ResponseException.class, () -> client().performRequest(createIndex)); + } + + @Test + public void testVectorSearchAgainstIndexWithScoreFieldRejects() throws IOException { + // _explain exercises planning (where the guard runs) without needing the k-NN plugin. + String indexName = "vs_collision_score"; + deleteIndexIfExists(indexName); + + Request createIndex = new Request("PUT", "/" + indexName); + createIndex.setJsonEntity("{\"mappings\":{\"properties\":{\"_score\":{\"type\":\"float\"}}}}"); + client().performRequest(createIndex); + + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT v._score FROM vectorSearch(table='" + + indexName + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "LIMIT 5")); + + assertEquals(400, ex.getResponse().getStatusLine().getStatusCode()); + assertThat(ex.getMessage(), containsString("_score")); + assertThat(ex.getMessage(), containsString("collides")); + } + + @Test + public void testSemicolonSeparatorInVectorRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0;2.0]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("vector=")); + assertThat(ex.getMessage(), containsString("comma-separated")); + } + + @Test + public void testNegativeMinScoreRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='min_score=-0.5') AS v")); + + assertThat(ex.getMessage(), containsString("min_score")); + assertThat(ex.getMessage(), containsString("non-negative")); + } + + @Test + public void testNegativeMaxDistanceRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0]', option='max_distance=-1.0') AS v")); + + assertThat(ex.getMessage(), containsString("max_distance")); + assertThat(ex.getMessage(), containsString("non-negative")); + } + + @Test + public void testTrailingCommaInVectorRejected() throws IOException { + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT v._id FROM vectorSearch(table='t', field='f', " + + "vector='[1.0,2.0,]', option='k=5') AS v")); + + assertThat(ex.getMessage(), containsString("Invalid vector component")); + assertThat(ex.getMessage(), containsString("trailing or consecutive commas")); + } + + // ── Alias with multiple backing indices ─────────────────────────────── + // vectorSearch() accepts an alias as `table=`. When the alias points at multiple backing + // indices, planning must accept the alias string instead of treating it as a wildcard or + // multi-target. Execution correctness over compatible knn_vector mappings is a separate + // concern covered by k-NN-enabled tests/follow-up; these tests lock in planning acceptance + // only, via _explain on the default no-kNN cluster. + + @Test + public void testExplainOverAliasWithMultipleBackingIndices() throws IOException { + // Create two indices with identical keyword mappings (no knn_vector, since the plugin is + // not installed) and a shared alias. We only assert the planner accepts the alias; whether + // k-NN accepts the alias at execution is a separate concern tested on a k-NN-enabled + // cluster. + // Randomized names so a stale alias/index left by an aborted prior run of this class does + // not shadow a fresh setup, which is a concrete risk on local reruns. + String suffix = java.util.UUID.randomUUID().toString().replace("-", "").substring(0, 8); + String idx1 = "vector_alias_backing_1_" + suffix; + String idx2 = "vector_alias_backing_2_" + suffix; + String alias = "vector_alias_combined_" + suffix; + try { + createSimpleIndex(idx1); + createSimpleIndex(idx2); + addToAlias(idx1, alias); + addToAlias(idx2, alias); + + String explain = + explainQuery( + "SELECT v._id FROM vectorSearch(table='" + + alias + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v"); + + assertThat(explain, containsString("VectorSearchIndexScan")); + assertThat(explain, containsString(alias)); + } finally { + // Deleting the backing indices removes the alias automatically, but delete the alias + // first for robustness against partial setup failures. + deleteAliasIfExists(alias); + deleteIndexIfExists(idx1); + deleteIndexIfExists(idx2); + } + } + + private void createSimpleIndex(String indexName) throws IOException { + Request create = new Request("PUT", "/" + indexName); + create.setJsonEntity("{\"mappings\":{\"properties\":{\"state\":{\"type\":\"keyword\"}}}}"); + client().performRequest(create); + } + + private void addToAlias(String indexName, String aliasName) throws IOException { + Request req = new Request("POST", "/_aliases"); + req.setJsonEntity( + "{\"actions\":[{\"add\":{\"index\":\"" + + indexName + + "\",\"alias\":\"" + + aliasName + + "\"}}]}"); + client().performRequest(req); + } + + private void deleteIndexIfExists(String indexName) { + try { + client().performRequest(new Request("DELETE", "/" + indexName)); + } catch (IOException ignored) { + // Index does not exist, which is fine. + } + } + + private void deleteAliasIfExists(String aliasName) { + try { + client().performRequest(new Request("DELETE", "/_all/_alias/" + aliasName)); + } catch (IOException ignored) { + // Alias does not exist, which is fine. + } + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchSubqueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchSubqueryIT.java new file mode 100644 index 00000000000..04346f87a76 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchSubqueryIT.java @@ -0,0 +1,306 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.hamcrest.Matchers.containsString; + +import java.io.IOException; +import org.junit.Test; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +/** + * Integration tests for vectorSearch() used inside subqueries. Locks in the rejection of outer + * WHERE on a vectorSearch() subquery, which would otherwise silently yield zero rows because the + * outer predicate is applied only after the k-NN search has already selected top-k documents by + * vector distance. + * + *

    Uses _explain-only plus error-path queries, so the k-NN plugin is not required — the planner + * validation fires during planning, before any k-NN execution. + */ +public class VectorSearchSubqueryIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.ACCOUNT); + } + + private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT; + + @Test + public void testOuterWhereOnSubqueryRejected() throws IOException { + // Without the guard the outer predicate is dropped from the pushed DSL and applied only in + // memory after k-NN returned top-k, which can yield silent zero rows. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "WHERE t.state = 'TX'")); + + assertThat( + ex.getMessage(), + containsString("Outer WHERE on a vectorSearch() subquery is not supported")); + assertThat(ex.getMessage(), containsString("silently yield zero rows")); + } + + @Test + public void testOuterWhereOnSubqueryRejectedWithLimit() throws IOException { + // Same shape with an outer LIMIT — exercises a second planner path (LogicalLimit above + // LogicalFilter above LogicalProject above scan builder). + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "WHERE t.state = 'TX' " + + "LIMIT 3")); + + assertThat( + ex.getMessage(), + containsString("Outer WHERE on a vectorSearch() subquery is not supported")); + } + + @Test + public void testOuterWhereOnSubqueryRejectedExplain() throws IOException { + // The guard must fire during planning, before any k-NN execution — so _explain must also + // return the validation error rather than a silently dropped predicate in the DSL. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "WHERE t.state = 'TX'")); + + assertThat( + ex.getMessage(), + containsString("Outer WHERE on a vectorSearch() subquery is not supported")); + } + + @Test + public void testOuterWhereWithInnerWhereStillRejected() throws IOException { + // Outer WHERE must be rejected even when the subquery already has its own inner WHERE. + // The shape reaches the planner as Filter(outer) -> Project -> Filter(inner) -> Scan, and + // the outer predicate is still separated from the k-NN search by the subquery project + // boundary. Without preserving the project marker across the inner filter, the walker + // would miss this shape and the outer predicate would silently produce zero rows. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + executeQuery( + "SELECT * FROM (SELECT v.firstname, v.state, v.age " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "WHERE v.age > 10) t " + + "WHERE t.state = 'TX'")); + + assertThat( + ex.getMessage(), + containsString("Outer WHERE on a vectorSearch() subquery is not supported")); + } + + @Test + public void testInnerWhereStillWorks() throws IOException { + // Positive control: WHERE directly on vectorSearch() inside the subquery must still plan + // successfully — the rejection is scoped to OUTER filters that cannot reach the push-down + // contract. We use _explain because the default integ-test cluster has no k-NN plugin. + String explain = + explainQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "WHERE v.state = 'TX') t"); + + assertThat(explain, containsString("wrapper")); + // Inner WHERE should push down, so the state predicate appears in the DSL. + assertThat(explain, containsString("state")); + } + + @Test + public void testInnerWhereWithOuterProjectStillWorks() throws IOException { + // Another positive control: the outer layer can still project and limit columns from the + // subquery without the guard firing — only outer WHERE is rejected. + String explain = + explainQuery( + "SELECT t.firstname FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "WHERE v.state = 'TX') t " + + "LIMIT 3"); + + assertThat(explain, containsString("wrapper")); + } + + @Test + public void testSubqueryNoWhereStillWorks() throws IOException { + // Baseline: a subquery with no WHERE anywhere must not be rejected — the guard fires only + // when an outer LogicalFilter sits above a subquery project boundary. + String explain = + explainQuery( + "SELECT * FROM (SELECT v.firstname " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "LIMIT 3"); + + assertThat(explain, containsString("wrapper")); + } + + @Test + public void testInnerOrderByScoreDescInSubqueryAllowed() throws IOException { + // Positive control: inner ORDER BY _score DESC on the vectorSearch() relation inside the + // subquery is the only supported sort, and must continue to plan successfully even when + // wrapped in an outer SELECT. Proves the walker does not over-reject sort shapes that are + // below the subquery Project rather than above it. + String explain = + explainQuery( + "SELECT * FROM (SELECT v.firstname, v._score " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v " + + "ORDER BY v._score DESC) t " + + "LIMIT 3"); + + assertThat(explain, containsString("wrapper")); + } + + @Test + public void testOuterOrderByOnSubqueryRejected() throws IOException { + // Outer ORDER BY over a vectorSearch() subquery would run on a truncated top-k slice rather + // than the full relation, silently reordering only the already-ANN-selected rows. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT * FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "ORDER BY t.state")); + + assertThat( + ex.getMessage(), + containsString("Outer ORDER BY on a vectorSearch() subquery is not supported")); + } + + @Test + public void testOuterOffsetOnSubqueryRejected() throws IOException { + // Outer OFFSET silently drops top-k rows by vector distance. The inner query already caps at + // k and any outer OFFSET shifts that window in an opaque way, so reject it. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT * FROM (SELECT v.firstname " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "LIMIT 3 OFFSET 2")); + + assertThat( + ex.getMessage(), + containsString("Outer OFFSET on a vectorSearch() subquery is not supported")); + } + + @Test + public void testOuterLimitWithoutOffsetOnSubqueryAllowed() throws IOException { + // Positive control: outer LIMIT without OFFSET just caps the row count and must plan without + // error. Locks in the offset==0 boundary of the OFFSET rejection. + String explain = + explainQuery( + "SELECT * FROM (SELECT v.firstname " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "LIMIT 3"); + + assertThat(explain, containsString("wrapper")); + } + + @Test + public void testOuterAggregationOnSubqueryRejected() throws IOException { + // Outer aggregation (here COUNT(*)) over a vectorSearch() subquery would run on the + // truncated top-k slice, producing a count that silently depends on k rather than the full + // population. vectorSearch() does not support aggregations, so reject the outer-subquery + // variant with the same subquery-boundary walker that catches outer WHERE. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT COUNT(*) FROM (SELECT v.firstname " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t")); + + assertThat( + ex.getMessage(), + containsString( + "Outer GROUP BY / aggregation / DISTINCT on a vectorSearch() subquery is not" + + " supported")); + } + + @Test + public void testOuterGroupByOnSubqueryRejected() throws IOException { + // GROUP BY rewrites to LogicalAggregation and is caught by the same subquery-boundary walker. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT t.state, COUNT(*) FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t " + + "GROUP BY t.state")); + + assertThat( + ex.getMessage(), + containsString( + "Outer GROUP BY / aggregation / DISTINCT on a vectorSearch() subquery is not" + + " supported")); + } + + @Test + public void testOuterDistinctOnSubqueryRejected() throws IOException { + // SELECT DISTINCT rewrites to a LogicalAggregation with empty aggregator list and the select + // items as the group-by list. The subquery-boundary walker must catch this shape too. + ResponseException ex = + expectThrows( + ResponseException.class, + () -> + explainQuery( + "SELECT DISTINCT t.state FROM (SELECT v.firstname, v.state " + + "FROM vectorSearch(table='" + + TEST_INDEX + + "', field='embedding', vector='[1.0, 2.0]', option='k=5') AS v) t")); + + assertThat( + ex.getMessage(), + containsString( + "Outer GROUP BY / aggregation / DISTINCT on a vectorSearch() subquery is not" + + " supported")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index 5da4e5cc794..61ebc9a16e9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -42,6 +42,12 @@ public class MatcherUtils { + /** Absolute tolerance floor for {@link #closeTo} numeric comparisons. */ + private static final double ABSOLUTE_TOLERANCE = 1e-10; + + /** Number of ULPs tolerated by {@link #closeTo} to absorb platform-dependent rounding. */ + private static final int ULP_TOLERANCE_FACTOR = 4; + private static final Logger LOG = LogManager.getLogger(); private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); @@ -302,20 +308,27 @@ protected boolean matchesSafely(JSONArray array) { } public static TypeSafeMatcher closeTo(Object... values) { - final double error = 1e-10; return new TypeSafeMatcher() { @Override protected boolean matchesSafely(JSONArray item) { List expectedValues = new ArrayList<>(Arrays.asList(values)); List actualValues = new ArrayList<>(); item.iterator().forEachRemaining(v -> actualValues.add((Object) v)); - return actualValues.stream() - .allMatch( - v -> - v instanceof Number - ? valuesAreClose( - (Number) v, (Number) expectedValues.get(actualValues.indexOf(v))) - : v.equals(expectedValues.get(actualValues.indexOf(v)))); + if (actualValues.size() != expectedValues.size()) { + return false; + } + for (int i = 0; i < actualValues.size(); i++) { + Object actual = actualValues.get(i); + Object expected = expectedValues.get(i); + if (actual instanceof Number && expected instanceof Number) { + if (!valuesAreClose((Number) actual, (Number) expected)) { + return false; + } + } else if (!actual.equals(expected)) { + return false; + } + } + return true; } @Override @@ -323,8 +336,16 @@ public void describeTo(Description description) { description.appendText(Arrays.toString(values)); } + /** + * ULP-aware comparison: tolerates up to {@link #ULP_TOLERANCE_FACTOR} ULPs or {@link + * #ABSOLUTE_TOLERANCE}, whichever is larger. + */ private boolean valuesAreClose(Number v1, Number v2) { - return Math.abs(v1.doubleValue() - v2.doubleValue()) <= error; + double d1 = v1.doubleValue(); + double d2 = v2.doubleValue(); + double diff = Math.abs(d1 - d2); + double ulpTolerance = ULP_TOLERANCE_FACTOR * Math.max(Math.ulp(d1), Math.ulp(d2)); + return diff <= Math.max(ABSOLUTE_TOLERANCE, ulpTolerance); } }; } diff --git a/integ-test/src/test/resources/expectedOutput/aggregation_query_explain.json b/integ-test/src/test/resources/expectedOutput/aggregation_query_explain.json index 9675b2b5beb..b0d0ac12251 100644 --- a/integ-test/src/test/resources/expectedOutput/aggregation_query_explain.json +++ b/integ-test/src/test/resources/expectedOutput/aggregation_query_explain.json @@ -6,8 +6,7 @@ "address", "script", "COUNT" - ], - "excludes" : [ ] + ] }, "stored_fields" : [ "address", diff --git a/integ-test/src/test/resources/expectedOutput/between_query.json b/integ-test/src/test/resources/expectedOutput/between_query.json index e3610f2dc13..1d23735e6e4 100644 --- a/integ-test/src/test/resources/expectedOutput/between_query.json +++ b/integ-test/src/test/resources/expectedOutput/between_query.json @@ -4,8 +4,7 @@ "_source" : { "includes" : [ "account_number" - ], - "excludes" : [ ] + ] }, "script_fields" : { "test" : { diff --git a/integ-test/src/test/resources/expectedOutput/calcite/access_struct_subfield_with_item.yaml b/integ-test/src/test/resources/expectedOutput/calcite/access_struct_subfield_with_item.yaml index 42c39458bdf..039b5e1c29f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/access_struct_subfield_with_item.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/access_struct_subfield_with_item.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]]) physical: | EnumerableCalc(expr#0=[{inputs}], expr#1=['dummy-datasource':VARCHAR], expr#2=[GEOIP($t1, $t0)], expr#3=['dummy_sub_field'], expr#4=[ITEM($t2, $t3)], host=[$t0], info=[$t2], info.dummy_sub_field=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["host"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["host"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp.yaml index 81138f6fe80..671b99baf94 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp.yaml @@ -10,4 +10,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml index ce84d53f479..29fe56f5ef6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml index ce84d53f479..29fe56f5ef6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_with_after_timestamp.yaml index 81138f6fe80..671b99baf94 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_with_after_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_with_after_timestamp.yaml @@ -10,4 +10,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml index 59e68e48769..94bdbffca0d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml @@ -5,4 +5,4 @@ calcite: LogicalSort(fetch=[10]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml index 7e14abeeef2..eb47b978a61 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml @@ -10,4 +10,4 @@ calcite: "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml index 13239b869cc..8b1000be45b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml @@ -11,4 +11,4 @@ calcite: "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml index 13239b869cc..8b1000be45b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml @@ -11,4 +11,4 @@ calcite: "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_with_after_timestamp.yaml index 7e14abeeef2..eb47b978a61 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_with_after_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_with_after_timestamp.yaml @@ -10,4 +10,4 @@ calcite: "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml index c7db35fbaa1..81d2be57d3e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml index 7718e89b02f..a73f4f508d2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($34)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},station=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[station, aws.cloudwatch.log_stream], LIMIT->500, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":500,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},station=COUNT()), PROJECT->[station, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->500, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":500,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml index 2ce4f996236..b0808ca93f4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($34)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},country=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[country, aws.cloudwatch.log_stream], LIMIT->50, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":50,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},country=COUNT()), PROJECT->[country, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->50, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":50,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml index b0c896f61fb..c277bb44fb8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml index 31cbb3b8d70..06a01e78e87 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '((message:monkey OR message:jackal) OR message:bear)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query', '((message:monkey OR message:jackal) OR message:bear)':VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"((message:monkey OR message:jackal) OR message:bear)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->query_string(MAP('query', '((message:monkey OR message:jackal) OR message:bear)':VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"((message:monkey OR message:jackal) OR message:bear)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml index e1471d87a4e..da8a6cf35f7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 10:00:00':VARCHAR)), query_string(MAP('fields', MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'monkey jackal bear':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(SEARCH($7, Sarg[['2023-01-03 00:00:00':VARCHAR..'2023-01-03 10:00:00':VARCHAR)]:VARCHAR), query_string(MAP('fields', MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'monkey jackal bear':VARCHAR))), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-03T00:00:00.000Z","to":"2023-01-03T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"query_string":{"query":"monkey jackal bear","fields":["message^1.0"],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(SEARCH($7, Sarg[['2023-01-03 00:00:00':VARCHAR..'2023-01-03 10:00:00':VARCHAR)]:VARCHAR), query_string(MAP('fields', MAP('message':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'monkey jackal bear':VARCHAR))), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-03T00:00:00.000Z","to":"2023-01-03T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"query_string":{"query":"monkey jackal bear","fields":["message^1.0"],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml index 27a43886bc4..491ae9557bd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-03T00:00:00.000Z","to":"2023-01-03T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"query_string":{"query":"monkey jackal bear","fields":["message^1.0"],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-03T00:00:00.000Z","to":"2023-01-03T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"query_string":{"query":"monkey jackal bear","fields":["message^1.0"],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml index 56c63c5c406..002d0fcac35 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml index ba8b035ab51..2486ac357cf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[AND(=($7, 'systemd'), SEARCH($28, Sarg[[1..100]]))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, process.name, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->AND(=($2, 'systemd'), SEARCH($14, Sarg[[1..100]])), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"term":{"process.name":{"value":"systemd","boost":1.0}}},{"range":{"metrics.size":{"from":1.0,"to":100.0,"include_lower":true,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, process.name, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->AND(=($2, 'systemd'), SEARCH($14, Sarg[[1..100]])), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"term":{"process.name":{"value":"systemd","boost":1.0}}},{"range":{"metrics.size":{"from":1.0,"to":100.0,"include_lower":true,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_big_term_query.yaml index 69dddd2ef14..d2d856a8867 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_big_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_big_term_query.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[SEARCH($28, Sarg[[20..30]])]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->SEARCH($13, Sarg[[20..30]]), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"metrics.size":{"from":20.0,"to":30.0,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->SEARCH($13, Sarg[[20..30]]), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"metrics.size":{"from":20.0,"to":30.0,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_small_term_query.yaml index 612e412b307..82160803187 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_small_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_small_term_query.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[OR(=($34, 'indigodagger'), SEARCH($28, Sarg[[10..20]]))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, aws.cloudwatch.log_stream, event], FILTER->OR(=($15, 'indigodagger'), SEARCH($13, Sarg[[10..20]])), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"should":[{"term":{"aws.cloudwatch.log_stream":{"value":"indigodagger","boost":1.0}}},{"range":{"metrics.size":{"from":10.0,"to":20.0,"include_lower":true,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, aws.cloudwatch.log_stream, event], FILTER->OR(=($15, 'indigodagger'), SEARCH($13, Sarg[[10..20]])), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"should":[{"term":{"aws.cloudwatch.log_stream":{"value":"indigodagger","boost":1.0}}},{"range":{"metrics.size":{"from":10.0,"to":20.0,"include_lower":true,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_disjunction_big_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_disjunction_big_range_small_term_query.yaml index 24cabf88754..a88c0d5e59a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_disjunction_big_range_small_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_disjunction_big_range_small_term_query.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[OR(=($34, 'indigodagger'), SEARCH($28, Sarg[[1..100]]))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, aws.cloudwatch.log_stream, event], FILTER->OR(=($15, 'indigodagger'), SEARCH($13, Sarg[[1..100]])), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"should":[{"term":{"aws.cloudwatch.log_stream":{"value":"indigodagger","boost":1.0}}},{"range":{"metrics.size":{"from":1.0,"to":100.0,"include_lower":true,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, aws.cloudwatch.log_stream, event], FILTER->OR(=($15, 'indigodagger'), SEARCH($13, Sarg[[1..100]])), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"should":[{"term":{"aws.cloudwatch.log_stream":{"value":"indigodagger","boost":1.0}}},{"range":{"metrics.size":{"from":1.0,"to":100.0,"include_lower":true,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml index cdf19c603a0..572a7e862ba 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[SEARCH($28, Sarg[[20..200]])]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->SEARCH($13, Sarg[[20..200]]), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"metrics.size":{"from":20.0,"to":200.0,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->SEARCH($13, Sarg[[20..200]]), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"metrics.size":{"from":20.0,"to":200.0,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml index e0b91168f1f..bfdbc25ed34 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-13T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-13T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml index 8af1fc7058d..f33f1aa9193 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml @@ -11,4 +11,4 @@ calcite: "order" : "desc", "missing" : "_last" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-13T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-13T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/scroll.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/scroll.yaml index 59e68e48769..94bdbffca0d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/scroll.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/scroll.yaml @@ -5,4 +5,4 @@ calcite: LogicalSort(fetch=[10]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml index 501c35a492a..37b00d5a092 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"meta.file":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"meta.file":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml index 501c35a492a..37b00d5a092 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"meta.file":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"meta.file":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc.yaml index cbbc5106ec6..56da1bf9a04 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc.yaml @@ -10,4 +10,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"metrics.size":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"metrics.size":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml index 9aa906cc6ca..16178fe703c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"log.file.path:\\/var\\/log\\/messages\\/solarshark","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"metrics.size":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"log.file.path:\\/var\\/log\\/messages\\/solarshark","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"metrics.size":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc.yaml index 3f059c7519f..b4080b5fe20 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc.yaml @@ -10,4 +10,4 @@ calcite: "order" : "desc", "missing" : "_last" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"metrics.size":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"metrics.size":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml index b52bb433722..0ec11c19736 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml @@ -11,4 +11,4 @@ calcite: "order" : "desc", "missing" : "_last" } - }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"log.file.path:\\/var\\/log\\/messages\\/solarshark","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]},"sort":[{"metrics.size":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"query_string":{"query":"log.file.path:\\/var\\/log\\/messages\\/solarshark","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]},"sort":[{"metrics.size":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml index 21c0d2d0e5d..34c84a0245a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[=($10, '/var/log/messages/birdknight')]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, log.file.path, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->=($3, '/var/log/messages/birdknight'), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"term":{"log.file.path":{"value":"/var/log/messages/birdknight","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, log.file.path, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->=($3, '/var/log/messages/birdknight'), LIMIT->10, PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"term":{"log.file.path":{"value":"/var/log/messages/birdknight","boost":1.0}}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_null_str.yaml index df5a461ab85..656118ca892 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_null_str.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_null_str.yaml @@ -28,7 +28,7 @@ calcite: EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], gender=[$t1], balance=[$t0], age0=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[balance, gender, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[balance, gender, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$0], dir0=[ASC]) EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) @@ -36,4 +36,4 @@ calcite: EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], expr#11=[IS NOT NULL($t4)], age=[$t4], avg(balance)=[$t10], $condition=[$t11]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], gender=[$t1], balance=[$t0], age0=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[balance, gender, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[balance, gender, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml index ae4cade06e5..f900b2ccbec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($68)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml index 7a8da847554..aa43e743192 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($31, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel":{"terms":{"field":"MobilePhoneModel","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, MobilePhoneModel], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel":{"terms":{"field":"MobilePhoneModel","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml index be24923eeea..a4691fd7e38 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($31, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[u, MobilePhone, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel|MobilePhone":{"multi_terms":{"terms":[{"field":"MobilePhoneModel"},{"field":"MobilePhone"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), PROJECT->[u, MobilePhone, MobilePhoneModel], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel|MobilePhone":{"multi_terms":{"terms":[{"field":"MobilePhoneModel"},{"field":"MobilePhone"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml index dd4f502bbde..0110be323ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml index d5c34e6a7f2..7dbe85eb016 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml index b13cc1a62ca..1c275e53363 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase|SearchEngineID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"SearchEngineID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), PROJECT->[c, SearchEngineID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase|SearchEngineID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"SearchEngineID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml index 3f0fb7644a9..f18539393d7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($84)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), UserID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID":{"terms":{"field":"UserID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), UserID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID":{"terms":{"field":"UserID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml index d9efea667c2..54dfa746d4d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($84), IS NOT NULL($63))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml index c4005b2ea4f..74f1ebab575 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], m=[EXTRACT('minute':VARCHAR, $17)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), SORT_AGG_METRICS->[3 DESC LAST], PROJECT->[count(), UserID, m, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), UserID, m, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q20.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q20.yaml index 0139a468f93..df20d60fd8b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q20.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q20.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[=($84, 435090932899640449)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[UserID], FILTER->=($0, 435090932899640449), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"UserID":{"value":435090932899640449,"boost":1.0}}},"_source":{"includes":["UserID"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[UserID], FILTER->=($0, 435090932899640449), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"UserID":{"value":435090932899640449,"boost":1.0}}},"_source":{"includes":["UserID"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml index a28945e87d0..edd3dabd8d8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT()), PROJECT->[c, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml index f5b8ec9c184..6f6b5056a9f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), PROJECT->[c, dc(UserID), SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q24.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q24.yaml index 545df273617..f24aabcab1a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q24.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q24.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},"_source":{"includes":["EventDate","URLRegionID","HasGCLID","Income","Interests","Robotness","BrowserLanguage","CounterClass","BrowserCountry","OriginalURL","ClientTimeZone","RefererHash","TraficSourceID","HitColor","RefererRegionID","URLCategoryID","LocalEventTime","EventTime","UTMTerm","AdvEngineID","UserAgentMinor","UserAgentMajor","RemoteIP","Sex","JavaEnable","URLHash","URL","ParamOrderID","OpenstatSourceID","HTTPError","SilverlightVersion3","MobilePhoneModel","SilverlightVersion4","SilverlightVersion1","SilverlightVersion2","IsDownload","IsParameter","CLID","FlashMajor","FlashMinor","UTMMedium","WatchID","DontCountHits","CookieEnable","HID","SocialAction","WindowName","ConnectTiming","PageCharset","IsLink","IsArtifical","JavascriptEnable","ClientEventTime","DNSTiming","CodeVersion","ResponseEndTiming","FUniqID","WindowClientHeight","OpenstatServiceName","UTMContent","HistoryLength","IsOldCounter","MobilePhone","SearchPhrase","FlashMinor2","SearchEngineID","IsEvent","UTMSource","RegionID","OpenstatAdID","UTMCampaign","GoodEvent","IsRefresh","ParamCurrency","Params","ResolutionHeight","ClientIP","FromTag","ParamCurrencyID","ResponseStartTiming","ResolutionWidth","SendTiming","RefererCategoryID","OpenstatCampaignID","UserID","WithHash","UserAgent","ParamPrice","ResolutionDepth","IsMobile","Age","SocialSourceNetworkID","OpenerName","OS","IsNotBounce","Referer","NetMinor","Title","NetMajor","IPNetworkID","FetchTiming","SocialNetwork","SocialSourcePage","CounterID","WindowClientWidth"],"excludes":[]},"sort":[{"EventTime":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},"_source":{"includes":["EventDate","URLRegionID","HasGCLID","Income","Interests","Robotness","BrowserLanguage","CounterClass","BrowserCountry","OriginalURL","ClientTimeZone","RefererHash","TraficSourceID","HitColor","RefererRegionID","URLCategoryID","LocalEventTime","EventTime","UTMTerm","AdvEngineID","UserAgentMinor","UserAgentMajor","RemoteIP","Sex","JavaEnable","URLHash","URL","ParamOrderID","OpenstatSourceID","HTTPError","SilverlightVersion3","MobilePhoneModel","SilverlightVersion4","SilverlightVersion1","SilverlightVersion2","IsDownload","IsParameter","CLID","FlashMajor","FlashMinor","UTMMedium","WatchID","DontCountHits","CookieEnable","HID","SocialAction","WindowName","ConnectTiming","PageCharset","IsLink","IsArtifical","JavascriptEnable","ClientEventTime","DNSTiming","CodeVersion","ResponseEndTiming","FUniqID","WindowClientHeight","OpenstatServiceName","UTMContent","HistoryLength","IsOldCounter","MobilePhone","SearchPhrase","FlashMinor2","SearchEngineID","IsEvent","UTMSource","RegionID","OpenstatAdID","UTMCampaign","GoodEvent","IsRefresh","ParamCurrency","Params","ResolutionHeight","ClientIP","FromTag","ParamCurrencyID","ResponseStartTiming","ResolutionWidth","SendTiming","RefererCategoryID","OpenstatCampaignID","UserID","WithHash","UserAgent","ParamPrice","ResolutionDepth","IsMobile","Age","SocialSourceNetworkID","OpenerName","OS","IsNotBounce","Referer","NetMinor","Title","NetMajor","IPNetworkID","FetchTiming","SocialNetwork","SocialSourcePage","CounterID","WindowClientWidth"]},"sort":[{"EventTime":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q25.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q25.yaml index 612b8bc06f8..38aa2bb563d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q25.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q25.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[SearchPhrase], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase"],"excludes":[]},"sort":[{"EventTime":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + }], LIMIT->10, PROJECT->[SearchPhrase], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase"]},"sort":[{"EventTime":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q26.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q26.yaml index 233a58c19c6..aa188a541a5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q26.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q26.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase"],"excludes":[]},"sort":[{"SearchPhrase":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase"]},"sort":[{"SearchPhrase":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q27.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q27.yaml index 1da73eb16c8..8ce3c23e3bd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q27.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q27.yaml @@ -16,4 +16,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10, PROJECT->[SearchPhrase], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase"],"excludes":[]},"sort":[{"EventTime":{"order":"asc","missing":"_first"}},{"SearchPhrase":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + }], LIMIT->10, PROJECT->[SearchPhrase], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase"]},"sort":[{"EventTime":{"order":"asc","missing":"_first"}},{"SearchPhrase":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml index a8ac7eaf9b7..bf40fe857ed 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml index 5cf8f54b258..81236b33d51 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($4)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml index e9b5c203f20..ccda84ba38a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($41), IS NOT NULL($76))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml index e2fd395e0ec..69dc8c94239 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($26)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml index 44a4218baf5..6ea79300182 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), PROJECT->[PageViews, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml index cd15e03f941..d4cbbe1fc48 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3},PageViews=COUNT()), PROJECT->[PageViews, Title], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml index c4bc303bfb2..6b85d93899f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), PROJECT->[PageViews, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml index e9eefc046b2..d9eb1a4c263 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml @@ -12,4 +12,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index c23839c1674..6482c38bddb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate], LIMIT->[10 from 100]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"EventDate|URLHash":{"multi_terms":{"terms":[{"field":"EventDate","value_type":"long"},{"field":"URLHash"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 3},PageViews=COUNT()), PROJECT->[PageViews, URLHash, EventDate], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->[10 from 100]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"EventDate|URLHash":{"multi_terms":{"terms":[{"field":"EventDate","value_type":"long"},{"field":"URLHash"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index 7a7d97c857a..96ee9b2a304 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3, 6},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WindowClientHeight|WindowClientWidth":{"multi_terms":{"terms":[{"field":"WindowClientHeight"},{"field":"WindowClientWidth"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={3, 6},PageViews=COUNT()), PROJECT->[PageViews, WindowClientWidth, WindowClientHeight], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"WindowClientHeight|WindowClientWidth":{"multi_terms":{"terms":[{"field":"WindowClientHeight"},{"field":"WindowClientWidth"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml index a71532d4271..05de3cbdcf9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($19, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), AdvEngineID], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($0, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), AdvEngineID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml index 6ea001905ce..5e6bc1617c5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q9.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($68)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), PROJECT->[u, RegionID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml index 0a8139b1eaa..2a9a55e5250 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml @@ -12,7 +12,7 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableUnion(all=[true]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], account_number=[$t2], firstname=[$t3], address=[$t3], balance=[$t0], gender=[$t3], city=[$t3], employer=[$t3], state=[$t3], age=[$t1], email=[$t3], lastname=[$t3]) EnumerableAggregate(group=[{}], balance=[SUM($0)], age=[SUM($1)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml index 0c8b4ec26a2..2660b88eeb2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml @@ -15,8 +15,8 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableUnion(all=[true]) EnumerableCalc(expr#0..10=[{inputs}], expr#11=[+($t3, $t8)], expr#12=[null:VARCHAR(13)], proj#0..12=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=['ColTotal':VARCHAR(13)], account_number=[$t2], firstname=[$t3], address=[$t3], balance=[$t0], gender=[$t3], city=[$t3], employer=[$t3], state=[$t3], age=[$t1], email=[$t3], lastname=[$t3], CustomSum=[$t2], all_emp_total=[$t4]) EnumerableAggregate(group=[{}], balance=[SUM($0)], age=[SUM($1)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml new file mode 100644 index 00000000000..349251ab4fc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_consecutive_sorts_issue_5125.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[DESC-nulls-last]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(c=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(gender=[$4]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, gender], SORT->[1 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"last","order":"desc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_on_window.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_on_window.yaml index bc2c9debf47..b9c91ab9ff5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_on_window.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_on_window.yaml @@ -11,4 +11,4 @@ calcite: EnumerableAggregate(group=[{0}], count()=[COUNT()]) EnumerableCalc(expr#0..1=[{inputs}], expr#2=[false], expr#3=[PATTERN_PARSER($t0, $t1, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], patterns_field=[$t6]) EnumerableWindow(window#0=[window(aggs [pattern($0, $1, $2, $3)])], constants=[[10, 100000, false]]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["address"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["address"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml index 1326030ea7e..9bda406291c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml @@ -15,5 +15,5 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableHashJoin(condition=[=($1, $2)], joinType=[semi]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml index 36bf1245a2d..cc79c0dc2f4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml @@ -17,5 +17,5 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..2=[{inputs}], c=[$t1], state=[$t2]) EnumerableHashJoin(condition=[=($0, $2)], joinType=[inner]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml index 75389120405..c08c533bc60 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml index be021c55e23..9c41efa9139 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), PROJECT->[sum, state], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml index e60bbe90fdc..b48e10e20c8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[cnt, span(birthdate,1d)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), PROJECT->[cnt, span(birthdate,1d)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":1}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml index 57132615c41..f2105ce0d3c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum(balance), span(age,5)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), PROJECT->[sum(balance), span(age,5)], SORT_AGG_METRICS->[0 DESC LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":1},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml index 3215115297a..cd0355241fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(balance), c, dc(employer), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), PROJECT->[sum(balance), c, dc(employer), state], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml index 211aa979ce0..59cd137ca59 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[4 DESC LAST], PROJECT->[sum(balance), count(), d, gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), PROJECT->[sum(balance), count(), d, gender, new_state], SORT_AGG_METRICS->[2 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml index 4caf7759fc6..b584249d91a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), gender, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), gender, state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml index 13d8350c11f..44a51b2171d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), new_gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), new_gender, new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml index 7e010cba2ad..e24043592fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), new_state], SORT_AGG_METRICS->[0 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json index 1375ff21c2b..50c79eed83f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json @@ -1,6 +1,6 @@ { "calcite": { "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], cnt=[$19])\n LogicalUnion(all=[true])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], cnt=[null:BIGINT])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[null:BIGINT], firstname=[null:VARCHAR], address=[null:VARCHAR], birthdate=[null:EXPR_TIMESTAMP VARCHAR], gender=[$0], city=[null:VARCHAR], lastname=[null:VARCHAR], balance=[null:BIGINT], employer=[null:VARCHAR], state=[null:VARCHAR], age=[null:INTEGER], email=[null:VARCHAR], male=[null:BOOLEAN], _id=[null:VARCHAR], _index=[null:VARCHAR], _score=[null:REAL], _maxscore=[null:REAL], _sort=[null:BIGINT], _routing=[null:VARCHAR], cnt=[$1])\n LogicalAggregate(group=[{0}], cnt=[COUNT($1)])\n LogicalProject(gender=[$4], balance=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical":"EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[null:BIGINT], proj#0..13=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=[null:EXPR_TIMESTAMP VARCHAR], expr#5=[null:INTEGER], expr#6=[null:BOOLEAN], account_number=[$t2], firstname=[$t3], address=[$t3], birthdate=[$t4], gender=[$t0], city=[$t3], lastname=[$t3], balance=[$t2], employer=[$t3], state=[$t3], age=[$t5], email=[$t3], male=[$t6], cnt=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical":"EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[null:BIGINT], proj#0..13=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=[null:EXPR_TIMESTAMP VARCHAR], expr#5=[null:INTEGER], expr#6=[null:BOOLEAN], account_number=[$t2], firstname=[$t3], address=[$t3], birthdate=[$t4], gender=[$t0], city=[$t3], lastname=[$t3], balance=[$t2], employer=[$t3], state=[$t3], age=[$t5], email=[$t3], male=[$t6], cnt=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml index dce92478a61..d86a5bf59f5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | EnumerableCalc(expr#0..3=[{inputs}], expr#4=[UNIX_TIMESTAMP($t3)], expr#5=[3600], expr#6=[DIVIDE($t4, $t5)], expr#7=[2], expr#8=[DIVIDE($t6, $t7)], expr#9=[FLOOR($t8)], expr#10=[*($t9, $t7)], expr#11=[*($t10, $t5)], expr#12=[FROM_UNIXTIME($t11)], proj#0..2=[{exprs}], @timestamp=[$t12]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp, @timestamp], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["category","value","timestamp","@timestamp"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp, @timestamp], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["category","value","timestamp","@timestamp"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_bins.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_bins.json index ff327963630..1f5f29a063d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_bins.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_bins.json @@ -1 +1 @@ -{"calcite":{"logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], email=[$8], lastname=[$9], age=[$16])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], age=[WIDTH_BUCKET($8, 3, -(MAX($8) OVER (), MIN($8) OVER ()), MAX($8) OVER ())])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n","physical":"EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[3], expr#14=[-($t11, $t12)], expr#15=[WIDTH_BUCKET($t8, $t13, $t14, $t11)], proj#0..7=[{exprs}], email=[$t9], lastname=[$t10], age=[$t15])\n EnumerableLimit(fetch=[5])\n EnumerableWindow(window#0=[window(aggs [MAX($8), MIN($8)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"}} \ No newline at end of file +{"calcite":{"logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], email=[$8], lastname=[$9], age=[$16])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], age=[WIDTH_BUCKET($8, 3, -(MAX($8) OVER (), MIN($8) OVER ()), MAX($8) OVER ())])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n","physical":"EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[3], expr#14=[-($t11, $t12)], expr#15=[WIDTH_BUCKET($t8, $t13, $t14, $t11)], proj#0..7=[{exprs}], email=[$t9], lastname=[$t10], age=[$t15])\n EnumerableLimit(fetch=[5])\n EnumerableWindow(window#0=[window(aggs [MAX($8), MIN($8)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"}} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_minspan.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_minspan.json index aaa807ed1db..064aa294a2d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_minspan.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_minspan.json @@ -1 +1 @@ -{"calcite":{"logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], email=[$8], lastname=[$9], age=[$16])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], age=[MINSPAN_BUCKET($8, 5.0E0:DOUBLE, -(MAX($8) OVER (), MIN($8) OVER ()), MAX($8) OVER ())])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n","physical":"EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[5.0E0:DOUBLE], expr#14=[-($t11, $t12)], expr#15=[MINSPAN_BUCKET($t8, $t13, $t14, $t11)], proj#0..7=[{exprs}], email=[$t9], lastname=[$t10], age=[$t15])\n EnumerableLimit(fetch=[5])\n EnumerableWindow(window#0=[window(aggs [MAX($8), MIN($8)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"}} \ No newline at end of file +{"calcite":{"logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], email=[$8], lastname=[$9], age=[$16])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], age=[MINSPAN_BUCKET($8, 5.0E0:DOUBLE, -(MAX($8) OVER (), MIN($8) OVER ()), MAX($8) OVER ())])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n","physical":"EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[5.0E0:DOUBLE], expr#14=[-($t11, $t12)], expr#15=[MINSPAN_BUCKET($t8, $t13, $t14, $t11)], proj#0..7=[{exprs}], email=[$t9], lastname=[$t10], age=[$t15])\n EnumerableLimit(fetch=[5])\n EnumerableWindow(window#0=[window(aggs [MAX($8), MIN($8)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"}} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_span.yaml index 35b422b93f8..cbfb4539b8a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_span.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..10=[{inputs}], expr#11=[10], expr#12=[SPAN_BUCKET($t10, $t11)], proj#0..9=[{exprs}], age=[$t12]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, email, lastname, age], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","email","lastname","age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, email, lastname, age], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","email","lastname","age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_start_end.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_start_end.json index 288a9d728e9..b48d47eb068 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_start_end.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_start_end.json @@ -1 +1 @@ -{"calcite":{"logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], gender=[$3], city=[$4], employer=[$5], state=[$6], age=[$7], email=[$8], lastname=[$9], balance=[$16])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], balance=[RANGE_BUCKET($3, MIN($3) OVER (), MAX($3) OVER (), 0, 100001)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n","physical":"EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[0], expr#14=[100001], expr#15=[RANGE_BUCKET($t3, $t11, $t12, $t13, $t14)], proj#0..2=[{exprs}], gender=[$t4], city=[$t5], employer=[$t6], state=[$t7], age=[$t8], email=[$t9], lastname=[$t10], balance=[$t15])\n EnumerableLimit(fetch=[5])\n EnumerableWindow(window#0=[window(aggs [MIN($3), MAX($3)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"}} \ No newline at end of file +{"calcite":{"logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], gender=[$3], city=[$4], employer=[$5], state=[$6], age=[$7], email=[$8], lastname=[$9], balance=[$16])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], balance=[RANGE_BUCKET($3, MIN($3) OVER (), MAX($3) OVER (), 0, 100001)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n","physical":"EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[0], expr#14=[100001], expr#15=[RANGE_BUCKET($t3, $t11, $t12, $t13, $t14)], proj#0..2=[{exprs}], gender=[$t4], city=[$t5], employer=[$t6], state=[$t7], age=[$t8], email=[$t9], lastname=[$t10], balance=[$t15])\n EnumerableLimit(fetch=[5])\n EnumerableWindow(window#0=[window(aggs [MIN($3), MAX($3)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"}} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml index c7b818113f5..4a9a143cba3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml index 2f9e75af3dd..e8e9ac1f4f2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml @@ -6,4 +6,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age=[$t0], age2=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml index 695e6c17c7c..5aa37ee3296 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age=[$t0], age2=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj.yaml index 56183ad9939..a4fbf3b7fef 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj.yaml @@ -12,10 +12,10 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableMergeJoin(condition=[=($13, $15)], joinType=[inner]) EnumerableCalc(expr#0..12=[{inputs}], expr#13=['(?^[A-Z])'], expr#14=['initial'], expr#15=[REX_EXTRACT($t6, $t13, $t14)], proj#0..12=[{exprs}], initial=[$t15]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[REX_EXTRACT($6, '(?^[A-Z])', 'initial') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?^[A-Z])","initial"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[REX_EXTRACT($6, '(?^[A-Z])', 'initial') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?^[A-Z])","initial"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000, SORT->[{ "firstname" : { "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"firstname":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"firstname":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml index 25e1e7f9956..2b4eeb2fec8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml @@ -17,5 +17,5 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableMergeJoin(condition=[=($0, $7)], joinType=[left]) EnumerableCalc(expr#0=[{inputs}], expr#1=['(?^[A-Z])'], expr#2=['lastname'], expr#3=[REX_EXTRACT($t0, $t1, $t2)], lastname=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[lastname], LIMIT->10000, SORT_EXPR->[REX_EXTRACT($0, '(?^[A-Z])', 'lastname') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["lastname"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?^[A-Z])","lastname"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[lastname], LIMIT->10000, SORT_EXPR->[REX_EXTRACT($0, '(?^[A-Z])', 'lastname') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["lastname"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?^[A-Z])","lastname"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[6]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"lastname":{"terms":{"field":"lastname","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"lastname"},{"field":"account_number"},{"field":"firstname"},{"field":"address"},{"field":"birthdate"},{"field":"gender"},{"field":"city"},{"field":"balance"},{"field":"employer"},{"field":"state"},{"field":"age"},{"field":"email"},{"field":"male"}]}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml index be22682ff85..d80ebc5735b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age2=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml index 8aa536681d5..31efd3c688c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[-($t13, $t10)], proj#0..14=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[-(+($10, $7), $10) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCsHsKICAib3AiOiB7CiAgICAibmFtZSI6ICItIiwKICAgICJraW5kIjogIk1JTlVTIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiKyIsCiAgICAgICAgImtpbmQiOiAiUExVUyIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0KICBdLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9Cn0=\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0,0],"DIGESTS":["age","balance","age"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[-(+($10, $7), $10) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCsHsKICAib3AiOiB7CiAgICAibmFtZSI6ICItIiwKICAgICJraW5kIjogIk1JTlVTIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiKyIsCiAgICAgICAgImtpbmQiOiAiUExVUyIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0KICBdLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9Cn0=\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0,0],"DIGESTS":["age","balance","age"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml index d9726a2beb1..3fd07a9682e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml @@ -14,4 +14,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_command.yaml index 16179925565..e722544f6d7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_command.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_command.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0=[{inputs}], expr#1=[AUTO($t0)], balance=[$t1]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_ctime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_ctime.yaml new file mode 100644 index 00000000000..dd3c53dc0da --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_ctime.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(ts=[CTIME(1066507633)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1066507633], expr#20=[CTIME($t19)], ts=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m"}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_dur2sec.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_dur2sec.yaml new file mode 100644 index 00000000000..fdbe6f1e8b7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_dur2sec.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(d=[DUR2SEC('01:23:45':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['01:23:45':VARCHAR], expr#20=[DUR2SEC($t19)], d=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m"}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mktime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mktime.yaml new file mode 100644 index 00000000000..a817226a708 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mktime.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(d=[MKTIME('10/18/2003 20:07:13':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['10/18/2003 20:07:13':VARCHAR], expr#20=[MKTIME($t19)], d=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m"}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mstime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mstime.yaml new file mode 100644 index 00000000000..43cc390ac77 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_mstime.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(t=[MSTIME('03:45.5':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['03:45.5':VARCHAR], expr#20=[MSTIME($t19)], t=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m"}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_multiple.yaml index a4940d90124..d947eaa667d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_multiple.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_multiple.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[AUTO($t0)], expr#3=[NUM($t1)], balance=[$t2], age=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance","age"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_with_alias.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_with_alias.yaml index 91340ada0ba..2a60c68f6eb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_with_alias.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_with_alias.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0=[{inputs}], expr#1=[AUTO($t0)], balance_num=[$t1]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml index 1948dfba6c3..3978b92e393 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml @@ -1,12 +1,13 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5 ORDER BY $1 NULLS FIRST, $3 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"gender"},{"field":"age"},{"field":"state"}],"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableTopK(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"gender"},{"field":"age"},{"field":"state"}],"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}},"sort":[{"gender":{"order":"asc","missing":"_first"}},{"state":{"order":"desc","missing":"_last"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml index b04fef7e1bc..9f99d5ec747 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4_alternative.yaml @@ -1,12 +1,13 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5 ORDER BY $1 NULLS FIRST, $3 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"gender"},{"field":"age"},{"field":"state"}],"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableTopK(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"gender"},{"field":"age"},{"field":"state"}],"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}},"sort":[{"gender":{"order":"asc","missing":"_first"}},{"state":{"order":"desc","missing":"_last"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml index f61eced9dea..dceae301b9c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1.yaml @@ -1,11 +1,11 @@ calcite: logical: | LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5 ORDER BY $4 NULLS FIRST, $5 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml index c17eb382c17..9545f4220e0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr_complex1_alternative.yaml @@ -1,11 +1,11 @@ calcite: logical: | LogicalSystemLimit(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)]) - LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) - LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$4], sort1=[$5], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5 ORDER BY $4 NULLS FIRST, $5 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml index 99ab0811bda..653bfff5686 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml @@ -9,4 +9,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t1)], expr#5=[1], expr#6=[<=($t3, $t5)], expr#7=[OR($t4, $t6)], proj#0..2=[{exprs}], $condition=[$t7]) EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml index c2c1e672087..e7f26a14a96 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml @@ -10,4 +10,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) EnumerableWindow(window#0=[window(partition {11} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($11)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"email","boost":1.0}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($11)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"email","boost":1.0}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml index 93d488e69de..e523be80bed 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4.yaml @@ -1,12 +1,12 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)]) - LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) - LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1 NULLS FIRST, $3 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"gender"},{"field":"state"},{"field":"account_number"},{"field":"age"}],"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[1 ASC FIRST, 3 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"gender"},{"field":"state"},{"field":"account_number"},{"field":"age"}],"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}},"sort":[{"gender":{"order":"asc","missing":"_first"}},{"state":{"order":"desc","missing":"_last"}}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml index 8cc9db58e9e..616d50a732a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_with_expr4_alternative.yaml @@ -1,12 +1,12 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) - LogicalFilter(condition=[<=($6, 2)]) - LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)]) - LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) - LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5]) + LogicalFilter(condition=[<=($6, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1 NULLS FIRST, $3 DESC NULLS LAST)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"gender"},{"field":"state"},{"field":"account_number"},{"field":"age"}],"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), SORT->[1 ASC FIRST, 3 DESC LAST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"desc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"gender"},{"field":"state"},{"field":"account_number"},{"field":"age"}],"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}},"sort":[{"gender":{"order":"asc","missing":"_first"}},{"state":{"order":"desc","missing":"_last"}}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_ignored.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_ignored.yaml new file mode 100644 index 00000000000..9ba323d8834 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_ignored.yaml @@ -0,0 +1,7 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_multiple.yaml new file mode 100644 index 00000000000..3f959b32cc0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_multiple.yaml @@ -0,0 +1,18 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{ + "age" : { + "order" : "desc", + "missing" : "_last" + } + }, { + "firstname.keyword" : { + "order" : "asc", + "missing" : "_first" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]},"sort":[{"age":{"order":"desc","missing":"_last"}},{"firstname.keyword":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_single.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_single.yaml new file mode 100644 index 00000000000..2d012145cf1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_single.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], dir0=[DESC-nulls-last]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{ + "age" : { + "order" : "desc", + "missing" : "_last" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]},"sort":[{"age":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.yaml index 4d371b4d427..f1f0a196da4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[SCALAR_MAX($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], new=[$t15]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml index cc83bbd8f71..7ff2572fdc8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[SCALAR_MIN($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], new=[$t15]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_avg.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_avg.json index 7e8dc10e046..62b362b8ce8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_avg.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_avg.json @@ -1,6 +1,6 @@ { "calcite": { "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[APPROX_DISTINCT_COUNT($7) OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical":"EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(aggs [APPROX_DISTINCT_COUNT($7)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical":"EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(aggs [APPROX_DISTINCT_COUNT($7)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_dc.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_dc.json index 4d7b5f07c6a..990887a7e0b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_dc.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_dc.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(aggs [DISTINCT_COUNT_APPROX($7)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(aggs [DISTINCT_COUNT_APPROX($7)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_distinct_count.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_distinct_count.json index 6ffdb5d51eb..6f98fbbb7bb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_distinct_count.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_distinct_count.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(partition {4} aggs [DISTINCT_COUNT_APPROX($7)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(partition {4} aggs [DISTINCT_COUNT_APPROX($7)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest.json index 60030435c34..55b47a1b15b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(partition {1} aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"created_at\",\"server\",\"@timestamp\",\"message\",\"level\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(partition {1} aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"created_at\",\"server\",\"@timestamp\",\"message\",\"level\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest_custom_time.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest_custom_time.json index 53d9934df12..09bf10d4933 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest_custom_time.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest_custom_time.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(partition {4} aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"created_at\",\"server\",\"@timestamp\",\"message\",\"level\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(partition {4} aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"created_at\",\"server\",\"@timestamp\",\"message\",\"level\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest_no_group.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest_no_group.json index 5524ad54abf..5186005b2e2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest_no_group.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_earliest_latest_no_group.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[ARG_MIN($3, $2) OVER ()], latest_message=[ARG_MAX($3, $2) OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"created_at\",\"server\",\"@timestamp\",\"message\",\"level\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableWindow(window#0=[window(aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"created_at\",\"server\",\"@timestamp\",\"message\",\"level\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_null_bucket.yaml index ae969892eeb..56208b16368 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_null_bucket.yaml @@ -8,4 +8,4 @@ calcite: EnumerableCalc(expr#0..12=[{inputs}], expr#13=[null:BIGINT], expr#14=[CASE($t11, $t12, $t13)], proj#0..10=[{exprs}], count()=[$t14]) EnumerableWindow(window#0=[window(partition {7} aggs [COUNT()])]) EnumerableCalc(expr#0..10=[{inputs}], expr#11=[IS NOT NULL($t7)], proj#0..11=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_exists_correlated_subquery.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_exists_correlated_subquery.yaml index 05dd3778b8f..3830e23a87b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_exists_correlated_subquery.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_exists_correlated_subquery.yaml @@ -15,7 +15,7 @@ calcite: EnumerableCalc(expr#0..3=[{inputs}], id=[$t1], name=[$t0], salary=[$t2]) CalciteEnumerableTopK(sort0=[$2], dir0=[DESC-nulls-last], fetch=[10000]) EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id, salary]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id","salary"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id, salary]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id","salary"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableAggregate(group=[{0}]) EnumerableCalc(expr#0=[{inputs}], expr#1=[true], expr#2=[$cor0], expr#3=[$t2.id], expr#4=[=($t3, $t0)], i=[$t1], $condition=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[PROJECT->[name, uid], FILTER->=($0, 'Tom'), LIMIT->10000, PROJECT->[uid]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"name":{"value":"Tom","boost":1.0}}},"_source":{"includes":["uid"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[PROJECT->[name, uid], FILTER->=($0, 'Tom'), LIMIT->10000, PROJECT->[uid]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"name":{"value":"Tom","boost":1.0}}},"_source":{"includes":["uid"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_exists_uncorrelated_subquery.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_exists_uncorrelated_subquery.yaml index f54be57fefd..a48b3de1d05 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_exists_uncorrelated_subquery.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_exists_uncorrelated_subquery.yaml @@ -14,7 +14,7 @@ calcite: EnumerableCalc(expr#0..3=[{inputs}], id=[$t1], name=[$t0], salary=[$t2]) CalciteEnumerableTopK(sort0=[$2], dir0=[DESC-nulls-last], fetch=[10000]) EnumerableNestedLoopJoin(condition=[true], joinType=[inner]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id, salary]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id","salary"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id, salary]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id","salary"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableAggregate(group=[{0}]) EnumerableCalc(expr#0=[{inputs}], expr#1=[true], i=[$t1]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[PROJECT->[name], FILTER->=($0, 'Tom'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"name":{"value":"Tom","boost":1.0}}},"_source":{"includes":["name"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[PROJECT->[name], FILTER->=($0, 'Tom'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"name":{"value":"Tom","boost":1.0}}},"_source":{"includes":["name"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_push.yaml index d35c47cf5cb..e153d0d2ed6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_push.yaml @@ -5,4 +5,4 @@ calcite: LogicalProject(age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_smaller_than_head_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_smaller_than_head_push.yaml index 7c80ddf56df..4184398d377 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_smaller_than_head_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_smaller_than_head_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalSort(fetch=[100]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->100, LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->100, LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_with_head_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_with_head_push.yaml index ba828e445b5..6fc43071004 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_with_head_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_fetch_size_with_head_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalSort(fetch=[3]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->3, LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":3,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=3, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->3, LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":3,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=3, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_field_format.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_field_format.yaml index 202f594bef0..8999b30a8e3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_field_format.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_field_format.yaml @@ -6,5 +6,5 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..10=[{inputs}], expr#11=['$':VARCHAR], expr#12=['commas':VARCHAR], expr#13=[TOSTRING($t3, $t12)], expr#14=[||($t11, $t13)], proj#0..10=[{exprs}], formatted_balance=[$t14]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_fillnull_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_fillnull_push.json index 9bdc21993e9..f52b5d94b74 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_fillnull_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_fillnull_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[COALESCE($8, -1)], balance=[COALESCE($3, -1)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], expr#2=[-1], expr#3=[COALESCE($t0, $t2)], expr#4=[COALESCE($t1, $t2)], age=[$t3], balance=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age, balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\",\"balance\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableCalc(expr#0..1=[{inputs}], expr#2=[-1], expr#3=[COALESCE($t0, $t2)], expr#4=[COALESCE($t1, $t2)], age=[$t3], balance=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age, balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\",\"balance\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_fillnull_value_syntax.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_fillnull_value_syntax.yaml index e677364b7c5..821fc66471a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_fillnull_value_syntax.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_fillnull_value_syntax.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[0], expr#3=[COALESCE($t0, $t2)], expr#4=[COALESCE($t1, $t2)], age=[$t3], balance=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age, balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age, balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_false.yaml index a3ee761cba9..65431476812 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_false.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_false.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[NOT($12)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->NOT($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":false,"boost":1.0}}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->NOT($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":false,"boost":1.0}}},"_source":{"includes":["firstname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_not_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_not_true.yaml index faf38c14bc5..651a9d676ce 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_not_true.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_not_true.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[IS NOT TRUE($12)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->IS NOT TRUE($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->IS NOT TRUE($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_true.yaml index 3a698fd5cb8..3e713bf9c10 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_true.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_boolean_only_true.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[$12]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->$1, PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":true,"boost":1.0}}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->$1, PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":true,"boost":1.0}}},"_source":{"includes":["firstname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_compare_ip.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_compare_ip.json index bb62be7b990..b454206df71 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_compare_ip.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_compare_ip.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(host=[$0])\n LogicalFilter(condition=[GREATER_IP($0, IP('1.1.1.1':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], FILTER->GREATER_IP($0, IP('1.1.1.1':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"host\":{\"from\":\"1.1.1.1\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"host\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], FILTER->GREATER_IP($0, IP('1.1.1.1':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"host\":{\"from\":\"1.1.1.1\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"host\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_compare_ipv6_swapped.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_compare_ipv6_swapped.json index 8ebbbbe885d..163dfb9fcdf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_compare_ipv6_swapped.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_compare_ipv6_swapped.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(host=[$0])\n LogicalFilter(condition=[LTE_IP(IP('::ffff:1234':VARCHAR), $0)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], FILTER->LTE_IP(IP('::ffff:1234':VARCHAR), $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"host\":{\"from\":\"::ffff:1234\",\"to\":null,\"include_lower\":true,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"host\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], FILTER->LTE_IP(IP('::ffff:1234':VARCHAR), $0), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"host\":{\"from\":\"::ffff:1234\",\"to\":null,\"include_lower\":true,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"host\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_cost_w_pushdown.txt b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_cost_w_pushdown.txt index 009d1b113da..adf2732011e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_cost_w_pushdown.txt +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_cost_w_pushdown.txt @@ -1 +1 @@ -CalciteEnumerableIndexScan(table=[[OpenSearch, test]], PushDownContext=[[PROJECT->[name, age], FILTER->=($1, 20), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"term\":{\"age\":{\"value\":20,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]): rowcount = 1500.0, cumulative cost = {2698.2000000000003 rows, 0.0 cpu, 0.0 io} +CalciteEnumerableIndexScan(table=[[OpenSearch, test]], PushDownContext=[[PROJECT->[name, age], FILTER->=($1, 20), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"term\":{\"age\":{\"value\":20,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"age\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]): rowcount = 1500.0, cumulative cost = {2698.2000000000003 rows, 0.0 cpu, 0.0 io} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_function_script_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_function_script_push.yaml index f72238d95a0..5cde543c213 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_function_script_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_function_script_push.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(=(CHAR_LENGTH($1), 5), =(ABS($8), 32), =($3, 39225))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, balance, age], SCRIPT->AND(=(CHAR_LENGTH($0), 5), =(ABS($2), 32), =($1, 39225)), PROJECT->[firstname, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["firstname.keyword",5]}},"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQB9nsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkFCUyIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["age",32]}},"boost":1.0}},{"term":{"balance":{"value":39225,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, balance, age], SCRIPT->AND(=(CHAR_LENGTH($0), 5), =(ABS($2), 32), =($1, 39225)), PROJECT->[firstname, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["firstname.keyword",5]}},"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQB9nsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkFCUyIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["age",32]}},"boost":1.0}},{"term":{"balance":{"value":39225,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml index 11bf9baa46c..62ec55f1eec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[>($8, 30)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], FILTER->AND(SEARCH($1, Sarg[(30..40)]), >($0, 10000)), PROJECT->[age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"range":{"age":{"from":30.0,"to":40.0,"include_lower":false,"include_upper":false,"boost":1.0}}},{"range":{"balance":{"from":10000,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], FILTER->AND(SEARCH($1, Sarg[(30..40)]), >($0, 10000)), PROJECT->[age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"range":{"age":{"from":30.0,"to":40.0,"include_lower":false,"include_upper":false,"boost":1.0}}},{"range":{"balance":{"from":10000,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["age"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml index 55951816ff7..2c7b9b19f27 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(yyyy-MM-dd=[$83]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[yyyy-MM-dd], FILTER->SEARCH($0, Sarg[('2016-12-08':VARCHAR..'2018-11-09':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"yyyy-MM-dd":{"from":"2016-12-08","to":"2018-11-09","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["yyyy-MM-dd"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[yyyy-MM-dd], FILTER->SEARCH($0, Sarg[('2016-12-08':VARCHAR..'2018-11-09':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"yyyy-MM-dd":{"from":"2016-12-08","to":"2018-11-09","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["yyyy-MM-dd"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml index faf6a3764c5..7719e378887 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(custom_time=[$49]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[custom_time], FILTER->SEARCH($0, Sarg[('12:00:00.123456789':VARCHAR..'19:00:00.123456789':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"custom_time":{"from":"12:00:00.123456789","to":"19:00:00.123456789","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["custom_time"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[custom_time], FILTER->SEARCH($0, Sarg[('12:00:00.123456789':VARCHAR..'19:00:00.123456789':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"custom_time":{"from":"12:00:00.123456789","to":"19:00:00.123456789","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["custom_time"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml index e0a3fc8a7d3..dabc1fdfb8c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":false,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":false,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml index 78ae3956b4a..6fe7ca4d064 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml index 422c7769140..4d6f5d36577 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_false.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml index cd51bb8a61a..48505d0bb67 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_query_string_with_boolean_not_true.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), IS NOT TRUE($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), IS NOT TRUE($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.yaml index a4e8e998ffd..efe9c77fddd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[CIDRMATCH($0, '0.0.0.0/24':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], SCRIPT->CIDRMATCH($0, '0.0.0.0/24':VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCRnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSURSTUFUQ0giLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ1ZHQiOiAiRVhQUl9JUCIsCiAgICAgICAgInR5cGUiOiAiT1RIRVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJPT0xFQU4iLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["host","0.0.0.0/24"]}},"boost":1.0}},"_source":{"includes":["host"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], SCRIPT->CIDRMATCH($0, '0.0.0.0/24':VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCRnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSURSTUFUQ0giLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ1ZHQiOiAiRVhQUl9JUCIsCiAgICAgICAgInR5cGUiOiAiT1RIRVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJPT0xFQU4iLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["host","0.0.0.0/24"]}},"boost":1.0}},"_source":{"includes":["host"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.yaml index 519377d3dde..fbec63d8c6f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(=($1, 'Amber'), =(-($8, 2), 30))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, age], SCRIPT->AND(=($0, 'Amber'), =(-($1, 2), 30)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"term":{"firstname.keyword":{"value":"Amber","boost":1.0}}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCwnsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIi0iLAogICAgICAgICJraW5kIjogIk1JTlVTIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["age",2,30]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, age], SCRIPT->AND(=($0, 'Amber'), =(-($1, 2), 30)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"term":{"firstname.keyword":{"value":"Amber","boost":1.0}}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCwnsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIi0iLAogICAgICAgICJraW5kIjogIk1JTlVTIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["age",2,30]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push_diff.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push_diff.json index e45019fc4de..5ee40b27a8d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push_diff.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push_diff.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(firstname=[$1])\n LogicalFilter(condition=[<>($1, '')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname], FILTER-><>($0, ''), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"exists\":{\"field\":\"firstname\",\"boost\":1.0}}],\"must_not\":[{\"term\":{\"firstname.keyword\":{\"value\":\"\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"firstname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname], FILTER-><>($0, ''), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"exists\":{\"field\":\"firstname\",\"boost\":1.0}}],\"must_not\":[{\"term\":{\"firstname.keyword\":{\"value\":\"\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"firstname\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_then_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_then_limit_push.yaml index 802867dfc5a..14b0c78f38a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_then_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_then_limit_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[>($8, 30)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->>($0, 30), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->>($0, 30), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_w_pushdown.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_w_pushdown.json index c99e6b323e2..ecaf7af0005 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_w_pushdown.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_w_pushdown.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], age=[$1])\n LogicalFilter(condition=[=($1, 20)])\n CalciteLogicalIndexScan(table=[[OpenSearch, test]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, test]], PushDownContext=[[PROJECT->[name, age], FILTER->=($1, 20), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"term\":{\"age\":{\"value\":20,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, test]], PushDownContext=[[PROJECT->[name, age], FILTER->=($1, 20), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"term\":{\"age\":{\"value\":20,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"age\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_graphlookup.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_graphlookup.yaml new file mode 100644 index 00000000000..d0b83104961 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_graphlookup.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(name=[$0], reportsTo=[$1], id=[$2], reportingHierarchy=[$9]) + LogicalGraphLookup(fromField=[reportsTo], toField=[name], outputField=[reportingHierarchy], depthField=[null], maxDepth=[0], bidirectional=[false]) + LogicalSort(fetch=[100]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_graph_employees]]) + LogicalProject(name=[$0], reportsTo=[$1], id=[$2]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_graph_employees]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..9=[{inputs}], proj#0..2=[{exprs}], reportingHierarchy=[$t9]) + CalciteEnumerableGraphLookup(fromField=[reportsTo], toField=[name], outputField=[reportingHierarchy], depthField=[null], maxDepth=[0], bidirectional=[false]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_graph_employees]], PushDownContext=[[LIMIT->100], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":100,"timeout":"1m"}, requestedTotalSize=100, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_graph_employees]], PushDownContext=[[PROJECT->[name, reportsTo, id]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","reportsTo","id"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_graphlookup_top_level.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_graphlookup_top_level.yaml new file mode 100644 index 00000000000..4a5caa6b904 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_graphlookup_top_level.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalGraphLookup(fromField=[reportsTo], toField=[name], outputField=[reportingHierarchy], depthField=[null], maxDepth=[0], bidirectional=[false], startValues=[[Eliot]]) + LogicalValues(tuples=[[]]) + LogicalProject(name=[$0], reportsTo=[$1], id=[$2]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_graph_employees]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableGraphLookup(fromField=[reportsTo], toField=[name], outputField=[reportingHierarchy], depthField=[null], maxDepth=[0], bidirectional=[false], startValues=[[Eliot]]) + EnumerableValues(tuples=[[]]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_graph_employees]], PushDownContext=[[PROJECT->[name, reportsTo, id]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","reportsTo","id"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml index 48d5bb74b55..a7454e0978c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_osd_format.yaml @@ -4,4 +4,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"pre_tags":[""],"post_tags":[""],"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"pre_tags":[""],"post_tags":[""],"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]},"highlight":{"pre_tags":[""],"post_tags":[""],"fragment_size":2147483647,"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml index 151c1fb7986..6fb0a9280c2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_single_term.yaml @@ -4,4 +4,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"Holmes":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fields":{"Holmes":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[Holmes], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]},"highlight":{"fields":{"Holmes":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml index 5d0351ae136..c8438414438 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_wildcard.yaml @@ -4,4 +4,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _highlight=[$17]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname, _highlight], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml index fbfce05e1fc..1fecf2574b2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_highlight_with_filter.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[>($8, 30)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","highlight":{"fields":{"*":{}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[firstname, age, _highlight], FILTER->>($1, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["firstname","age"],"excludes":[]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[HIGHLIGHT->[*], PROJECT->[firstname, age, _highlight], FILTER->>($1, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["firstname","age"]},"highlight":{"fields":{"*":{}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_in_correlated_subquery.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_in_correlated_subquery.yaml index 18cd0c5147a..23bd0d6df69 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_in_correlated_subquery.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_in_correlated_subquery.yaml @@ -16,7 +16,7 @@ calcite: CalciteEnumerableTopK(sort0=[$2], dir0=[DESC-nulls-last], fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[=($t0, $t3)], proj#0..3=[{exprs}], $condition=[$t4]) EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id, salary]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id","salary"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id, salary]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id","salary"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableAggregate(group=[{0}]) EnumerableCalc(expr#0..1=[{inputs}], expr#2=[$cor0], expr#3=[$t2.id], expr#4=[=($t3, $t1)], proj#0..1=[{exprs}], $condition=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[PROJECT->[name, uid], FILTER->=($0, 'Tom'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"name":{"value":"Tom","boost":1.0}}},"_source":{"includes":["name","uid"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[PROJECT->[name, uid], FILTER->=($0, 'Tom'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"name":{"value":"Tom","boost":1.0}}},"_source":{"includes":["name","uid"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_in_uncorrelated_subquery.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_in_uncorrelated_subquery.yaml index ae5e866620b..f5a15b26c25 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_in_uncorrelated_subquery.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_in_uncorrelated_subquery.yaml @@ -13,5 +13,5 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], id=[$t1], name=[$t0], salary=[$t2]) CalciteEnumerableTopK(sort0=[$2], dir0=[DESC-nulls-last], fetch=[10000]) EnumerableHashJoin(condition=[=($1, $3)], joinType=[semi]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id, salary]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id","salary"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[PROJECT->[uid], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["uid"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id, salary]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id","salary"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[PROJECT->[uid], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["uid"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml index d262a4af76d..642d64b4236 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1))), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiVFJJTSIsCiAgICAgICAgICAgICJraW5kIjogIlRSSU0iLAogICAgICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogIkJPVEgiLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiU1lNQk9MIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfQogICAgICAgICAgXQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0],"DIGESTS":["firstname.keyword"," ","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1))), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiVFJJTSIsCiAgICAgICAgICAgICJraW5kIjogIlRSSU0iLAogICAgICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogIkJPVEgiLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiU1lNQk9MIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfQogICAgICAgICAgXQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0],"DIGESTS":["firstname.keyword"," ","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml index df97332ae98..c4f104e6462 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[OR(IS NULL($1), IS EMPTY($1))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC13sKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["firstname.keyword","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC13sKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["firstname.keyword","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml index 06215911134..e7603f5643d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[OR(=($4, 'M'), IS NULL($1), IS EMPTY($1))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), =($4, 'M'), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQEtnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIEVNUFRZIiwKICAgICAgICAia2luZCI6ICJPVEhFUiIsCiAgICAgICAgInN5bnRheCI6ICJQT1NURklYIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,0],"DIGESTS":["firstname.keyword","gender.keyword","M","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), =($4, 'M'), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQEtnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIEVNUFRZIiwKICAgICAgICAia2luZCI6ICJPVEhFUiIsCiAgICAgICAgInN5bnRheCI6ICJQT1NURklYIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,0],"DIGESTS":["firstname.keyword","gender.keyword","M","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isnull_or_others.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_isnull_or_others.json index e4d43e1690e..7d756737e99 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isnull_or_others.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isnull_or_others.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[OR(IS NULL($1), =($4, 'M'))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->OR(IS NULL($1), =($4, 'M')), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"should\":[{\"bool\":{\"must_not\":[{\"exists\":{\"field\":\"firstname\",\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},{\"term\":{\"gender.keyword\":{\"value\":\"M\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->OR(IS NULL($1), =($4, 'M')), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"should\":[{\"bool\":{\"must_not\":[{\"exists\":{\"field\":\"firstname\",\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},{\"term\":{\"gender.keyword\":{\"value\":\"M\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_issue_5114_sort_expr_head_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_issue_5114_sort_expr_head_push.yaml index 84d91a3885f..29dd36819a0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_issue_5114_sort_expr_head_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_issue_5114_sort_expr_head_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], a=[RAND()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number], SORT_EXPR->[RAND() ASCENDING NULLS_FIRST], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQAbnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSQU5EIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[],"DIGESTS":[]}},"type":"number","order":"asc"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number], SORT_EXPR->[RAND() ASCENDING NULLS_FIRST], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQAbnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSQU5EIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[],"DIGESTS":[]}},"type":"number","order":"asc"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml index a7dcf0e6bf2..94a464c2f93 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml @@ -20,5 +20,5 @@ calcite: "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"firstname"},{"field":"address"},{"field":"birthdate"},{"field":"gender"},{"field":"city"},{"field":"lastname"},{"field":"balance"},{"field":"employer"},{"field":"state"},{"field":"age"},{"field":"email"},{"field":"male"}]}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields.yaml index 12259a8e5ae..ef4a5a293e7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields.yaml @@ -17,10 +17,10 @@ calcite: "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number"]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000, SORT->[{ "account_number" : { "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml index 45cd7328f1c..21e65fcce30 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml @@ -17,4 +17,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableHashJoin(condition=[=($0, $13)], joinType=[inner]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"account_number":{"terms":{"field":"account_number","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"account_number"},{"field":"firstname"},{"field":"address"},{"field":"birthdate"},{"field":"gender"},{"field":"city"},{"field":"lastname"},{"field":"balance"},{"field":"employer"},{"field":"state"},{"field":"age"},{"field":"email"},{"field":"male"}]}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_ilike_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_ilike_function.yaml index 3a891dc6bc4..0651ff30dbd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_ilike_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_ilike_function.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.yaml index f76e6520ae5..98b5bbb2f34 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[LIKE($1, '%mbe%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->LIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->LIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function_case_insensitive.yaml index 3a891dc6bc4..0651ff30dbd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function_case_insensitive.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function_case_insensitive.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10_5_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10_5_push.yaml index dee2d92c7a3..13fde3b8dc3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10_5_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10_5_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalSort(fetch=[10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->10, LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->10, LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10_filter_5_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10_filter_5_push.yaml index 21f46d7f96a..e812d4f7e63 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10_filter_5_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10_filter_5_push.yaml @@ -10,4 +10,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableLimit(fetch=[5]) EnumerableCalc(expr#0=[{inputs}], expr#1=[30], expr#2=[>($t0, $t1)], age=[$t0], $condition=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10from1_10from2_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10from1_10from2_push.yaml index e17b61abc78..a3241998851 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10from1_10from2_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_10from1_10from2_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalSort(offset=[1], fetch=[10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->[10 from 1], LIMIT->[10 from 2], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":3,"size":8,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=8, pageSize=null, startFrom=3)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->[10 from 1], LIMIT->[10 from 2], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":3,"size":8,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=8, pageSize=null, startFrom=3)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_5_10_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_5_10_push.yaml index 7b936024c74..2371eada6db 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_5_10_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_5_10_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalSort(fetch=[5]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5, LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5, LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_offsets_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_offsets_push.yaml index 1a5003d8a2b..de91009aa7e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_offsets_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_offsets_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalSort(offset=[1], fetch=[10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->[10 from 1], LIMIT->[5 from 2], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":3,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=3)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->[10 from 1], LIMIT->[5 from 2], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":3,"size":5,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=3)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_push.yaml index 09636db30ad..690c3ce24e7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0=[{inputs}], expr#1=[30], expr#2=[-($t0, $t1)], ageMinus=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_filter_push.yaml index aed8746d78b..ae87c16c4bb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_filter_push.yaml @@ -8,4 +8,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0=[{inputs}], expr#1=[30], expr#2=[>($t0, $t1)], age=[$t0], $condition=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.yaml index 7ab0399e85f..d1cb11da7c1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.yaml @@ -11,4 +11,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_list_aggregation.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_list_aggregation.json index 1b824c29814..763bab772c4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_list_aggregation.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_list_aggregation.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], age_list=[LIST($0)])\n LogicalProject(age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableAggregate(group=[{}], age_list=[LIST($0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableAggregate(group=[{}], age_list=[LIST($0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_merge_join_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_merge_join_sort_push.yaml index 7c5f95fba77..56fff3a727f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_merge_join_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_merge_join_sort_push.yaml @@ -16,10 +16,10 @@ calcite: "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000, SORT->[{ "account_number" : { "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json index 46216cff058..c745b7bc963 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_fields_relevance_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[simple_query_string(MAP('fields', MAP('name':VARCHAR, 4.0E0:DOUBLE, 'email':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'gmail':VARCHAR), MAP('default_operator', 'or':VARCHAR), MAP('analyzer', 'english':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->simple_query_string(MAP('fields', MAP('name':VARCHAR, 4.0E0:DOUBLE, 'email':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'gmail':VARCHAR), MAP('default_operator', 'or':VARCHAR), MAP('analyzer', 'english':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"simple_query_string\":{\"query\":\"gmail\",\"fields\":[\"name^4.0\",\"email^1.0\"],\"analyzer\":\"english\",\"flags\":-1,\"default_operator\":\"or\",\"analyze_wildcard\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->simple_query_string(MAP('fields', MAP('name':VARCHAR, 4.0E0:DOUBLE, 'email':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'gmail':VARCHAR), MAP('default_operator', 'or':VARCHAR), MAP('analyzer', 'english':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"simple_query_string\":{\"query\":\"gmail\",\"fields\":[\"name^4.0\",\"email^1.0\"],\"analyzer\":\"english\",\"flags\":-1,\"default_operator\":\"or\",\"analyze_wildcard\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_sort_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_sort_push.json index c2fa77194f5..a1b819bea2b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_sort_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multi_sort_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$3], sort1=[$4], sort2=[$0], dir0=[DESC-nulls-last], dir1=[DESC-nulls-last], dir2=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4])\n LogicalSort(sort0=[$3], sort1=[$4], sort2=[$0], dir0=[DESC-nulls-last], dir1=[DESC-nulls-last], dir2=[ASC-nulls-first])\n LogicalSort(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first], dir2=[ASC-nulls-first], dir3=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender], SORT->[{\n \"balance\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"gender.keyword\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\"],\"excludes\":[]},\"sort\":[{\"balance\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"gender.keyword\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"account_number\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender], SORT->[{\n \"balance\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"gender.keyword\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\"]},\"sort\":[{\"balance\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"gender.keyword\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"account_number\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multisearch_basic.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_multisearch_basic.yaml index 8fe5241ced4..ce18b5229db 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_multisearch_basic.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multisearch_basic.yaml @@ -17,6 +17,6 @@ calcite: EnumerableAggregate(group=[{0}], count=[COUNT()]) EnumerableUnion(all=[true]) EnumerableCalc(expr#0=[{inputs}], expr#1=['young':VARCHAR], age_group=[$t1]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER-><($0, 30)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"age":{"from":null,"to":30,"include_lower":true,"include_upper":false,"boost":1.0}}},"_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER-><($0, 30)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"age":{"from":null,"to":30,"include_lower":true,"include_upper":false,"boost":1.0}}},"_source":{"includes":["age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0=[{inputs}], expr#1=['adult':VARCHAR], age_group=[$t1]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->>=($0, 30)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->>=($0, 30)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multisearch_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_multisearch_timestamp.yaml index 92358db2bf8..cc0102e73ba 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_multisearch_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multisearch_timestamp.yaml @@ -18,10 +18,10 @@ calcite: "order" : "desc", "missing" : "_first" } - }], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"terms":{"category":["A","B"],"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + }], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"terms":{"category":["A","B"],"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"]},"sort":[{"@timestamp":{"order":"desc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data2]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->SEARCH($1, Sarg['E':VARCHAR, 'F':VARCHAR]:VARCHAR), SORT->[{ "@timestamp" : { "order" : "desc", "missing" : "_first" } - }], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"terms":{"category":["E","F"],"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"terms":{"category":["E","F"],"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"]},"sort":[{"@timestamp":{"order":"desc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml index ff54f066772..c387d566e2c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml @@ -9,4 +9,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableAggregate(group=[{0, 1}], age=[ARRAY_AGG($2) FILTER $3]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t2)], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["state","city","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml index c07f8d5f063..7d92ea3387e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml @@ -13,7 +13,7 @@ calcite: EnumerableCalc(expr#0..5=[{inputs}], proj#0..3=[{exprs}], skills_arr=[$t5]) EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{4}]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[1], expr#5=[2], expr#6=[3], expr#7=[array($t4, $t5, $t6)], proj#0..3=[{exprs}], skills_arr=[$t7]) - CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]], PushDownContext=[[PROJECT->[skills, skills_int, skills_not_array, username]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["skills","skills_int","skills_not_array","username"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]], PushDownContext=[[PROJECT->[skills, skills_int, skills_not_array, username]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["skills","skills_int","skills_not_array","username"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableUncollect EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.skills_arr], skills_arr=[$t2]) EnumerableValues(tuples=[[{ 0 }]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_dedup_not_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_dedup_not_push.yaml index c9f8b484aed..5ad37732b07 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_dedup_not_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nested_agg_dedup_not_push.yaml @@ -10,4 +10,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..5=[{inputs}], expr#6=[1], expr#7=[<=($t5, $t6)], proj#0..1=[{exprs}], id=[$t3], age=[$t4], $condition=[$t7]) EnumerableWindow(window#0=[window(partition {2} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, address.city, id, age], FILTER->IS NOT NULL($2)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"nested":{"query":{"exists":{"field":"address.city","boost":1.0}},"path":"address","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["name","address","address.city","id","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, address.city, id, age], FILTER->IS NOT NULL($2)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"nested":{"query":{"exists":{"field":"address.city","boost":1.0}},"path":"address","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["name","address","address.city","id","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml index e522ceb639e..b5b06f8f635 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableCalc(expr#0..2=[{inputs}], expr#3=[array($t0, $t1)], expr#4=[ARRAY_COMPACT($t3)], expr#5=[' '], expr#6=[ARRAY_JOIN($t4, $t5)], expr#7=['':VARCHAR], expr#8=[COALESCE($t6, $t7)], proj#0..2=[{exprs}], location=[$t8]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state","city","age"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_not_between_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_not_between_push.yaml index beeec6867f9..c4a88cdad84 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_not_between_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_not_between_push.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[SEARCH($10, Sarg[(-∞..30), (39..+∞)])]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($10, Sarg[(-∞..30), (39..+∞)]), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"should":[{"range":{"age":{"from":null,"to":30.0,"include_lower":true,"include_upper":false,"boost":1.0}}},{"range":{"age":{"from":39.0,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($10, Sarg[(-∞..30), (39..+∞)]), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"should":[{"range":{"age":{"from":null,"to":30.0,"include_lower":true,"include_upper":false,"boost":1.0}}},{"range":{"age":{"from":39.0,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml index 862a45dc617..b8e0f7ed6d5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml @@ -2,20 +2,21 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) - LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) - LogicalFilter(condition=[IS NOT NULL($2)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) - LogicalProject(avg_age=[$2], state=[$0], city=[$1]) - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) - LogicalProject(state=[$7], city=[$5], age=[$8]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]) + LogicalFilter(condition=[IS NOT NULL($2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) + LogicalProject(avg_age=[$2], state=[$0], city=[$1]) + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) + LogicalProject(state=[$7], city=[$5], age=[$8]) + LogicalFilter(condition=[>($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]) - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) - EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) + CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.json index 6235593dc6f..6daddb8ee76 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]): rowcount = 56.25, cumulative cost = {165681.25 rows, 105156.5471810663 cpu, 0.0 io}, id = 9401\n LogicalProject(age2=[$2]): rowcount = 56.25, cumulative cost = {165625.0 rows, 104256.5471810663 cpu, 0.0 io}, id = 9400\n LogicalFilter(condition=[<=($3, 1)]): rowcount = 56.25, cumulative cost = {165568.75 rows, 104200.2971810663 cpu, 0.0 io}, id = 9398\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]): rowcount = 225.0, cumulative cost = {165512.5 rows, 103975.2971810663 cpu, 0.0 io}, id = 9397\n LogicalFilter(condition=[IS NOT NULL($2)]): rowcount = 225.0, cumulative cost = {165287.5 rows, 103075.2971810663 cpu, 0.0 io}, id = 9396\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]): rowcount = 500.0, cumulative cost = {165062.5 rows, 102575.2971810663 cpu, 0.0 io}, id = 9395\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]): rowcount = 500.0, cumulative cost = {164562.5 rows, 101075.2971810663 cpu, 0.0 io}, id = 9393\n LogicalProject(avg_age=[$2], state=[$0], city=[$1]): rowcount = 500.0, cumulative cost = {164062.5 rows, 26500.0 cpu, 0.0 io}, id = 9392\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]): rowcount = 500.0, cumulative cost = {163562.5 rows, 25000.0 cpu, 0.0 io}, id = 9391\n LogicalProject(state=[$7], city=[$5], age=[$8]): rowcount = 5000.0, cumulative cost = {163000.0 rows, 25000.0 cpu, 0.0 io}, id = 9390\n LogicalFilter(condition=[>($8, 30)]): rowcount = 5000.0, cumulative cost = {158000.0 rows, 10000.0 cpu, 0.0 io}, id = 9389\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]): rowcount = 10000.0, cumulative cost = {153000.0 rows, 0.0 cpu, 0.0 io}, id = 9388\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]): rowcount = 225.0, cumulative cost = {2981.25 rows, 7950.0 cpu, 0.0 io}, id = 11254\n EnumerableLimit(fetch=[10000]): rowcount = 225.0, cumulative cost = {2756.25 rows, 7275.0 cpu, 0.0 io}, id = 11246\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]): rowcount = 225.0, cumulative cost = {2531.25 rows, 7050.0 cpu, 0.0 io}, id = 11250\n EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]): rowcount = 450.0, cumulative cost = {2306.25 rows, 3900.0 cpu, 0.0 io}, id = 11242\n EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]): rowcount = 450.0, cumulative cost = {1856.25 rows, 3000.0 cpu, 0.0 io}, id = 11258\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 500.0, cumulative cost = {1406.25 rows, 0.0 cpu, 0.0 io}, id = 10962\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]): rowcount = 56.25, cumulative cost = {165293.75 rows, 36190.23815546412 cpu, 0.0 io}, id = 4117\n LogicalProject(age2=[$2]): rowcount = 56.25, cumulative cost = {165237.5 rows, 35290.23815546412 cpu, 0.0 io}, id = 4116\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]): rowcount = 56.25, cumulative cost = {165181.25 rows, 35233.98815546412 cpu, 0.0 io}, id = 4115\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2]): rowcount = 56.25, cumulative cost = {165125.0 rows, 29793.75 cpu, 0.0 io}, id = 4114\n LogicalFilter(condition=[<=($3, 1)]): rowcount = 56.25, cumulative cost = {165068.75 rows, 29625.0 cpu, 0.0 io}, id = 4113\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]): rowcount = 225.0, cumulative cost = {165012.5 rows, 29400.0 cpu, 0.0 io}, id = 4112\n LogicalFilter(condition=[IS NOT NULL($2)]): rowcount = 225.0, cumulative cost = {164787.5 rows, 28500.0 cpu, 0.0 io}, id = 4111\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]): rowcount = 500.0, cumulative cost = {164562.5 rows, 28000.0 cpu, 0.0 io}, id = 4110\n LogicalProject(avg_age=[$2], state=[$0], city=[$1]): rowcount = 500.0, cumulative cost = {164062.5 rows, 26500.0 cpu, 0.0 io}, id = 4106\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]): rowcount = 500.0, cumulative cost = {163562.5 rows, 25000.0 cpu, 0.0 io}, id = 4105\n LogicalProject(state=[$7], city=[$5], age=[$8]): rowcount = 5000.0, cumulative cost = {163000.0 rows, 25000.0 cpu, 0.0 io}, id = 4104\n LogicalFilter(condition=[>($8, 30)]): rowcount = 5000.0, cumulative cost = {158000.0 rows, 10000.0 cpu, 0.0 io}, id = 4103\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]): rowcount = 10000.0, cumulative cost = {153000.0 rows, 0.0 cpu, 0.0 io}, id = 4102\n", + "physical": "EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]): rowcount = 225.0, cumulative cost = {3201.75 rows, 39096.942171903866 cpu, 0.0 io}, id = 4910\n CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]): rowcount = 225.0, cumulative cost = {2976.75 rows, 38196.942171903866 cpu, 0.0 io}, id = 4902\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]): rowcount = 225.0, cumulative cost = {2531.25 rows, 8950.0 cpu, 0.0 io}, id = 4906\n EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]): rowcount = 450.0, cumulative cost = {2306.25 rows, 4900.0 cpu, 0.0 io}, id = 4898\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]): rowcount = 450.0, cumulative cost = {1856.25 rows, 4000.0 cpu, 0.0 io}, id = 4914\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#4218:LogicalAggregate.NONE.[](input=RelSubset#4186,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 500.0, cumulative cost = {1406.25 rows, 0.0 cpu, 0.0 io}, id = 4505\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.yaml index 63b402833fc..c760955d27b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_cost.yaml @@ -1,21 +1,22 @@ calcite: logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]): rowcount = 56.25, cumulative cost = {165681.25 rows, 105156.5471810663 cpu, 0.0 io}, id = 7529 - LogicalProject(age2=[$2]): rowcount = 56.25, cumulative cost = {165625.0 rows, 104256.5471810663 cpu, 0.0 io}, id = 7528 - LogicalFilter(condition=[<=($3, 1)]): rowcount = 56.25, cumulative cost = {165568.75 rows, 104200.2971810663 cpu, 0.0 io}, id = 7526 - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]): rowcount = 225.0, cumulative cost = {165512.5 rows, 103975.2971810663 cpu, 0.0 io}, id = 7525 - LogicalFilter(condition=[IS NOT NULL($2)]): rowcount = 225.0, cumulative cost = {165287.5 rows, 103075.2971810663 cpu, 0.0 io}, id = 7524 - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]): rowcount = 500.0, cumulative cost = {165062.5 rows, 102575.2971810663 cpu, 0.0 io}, id = 7523 - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]): rowcount = 500.0, cumulative cost = {164562.5 rows, 101075.2971810663 cpu, 0.0 io}, id = 7521 - LogicalProject(avg_age=[$2], state=[$0], city=[$1]): rowcount = 500.0, cumulative cost = {164062.5 rows, 26500.0 cpu, 0.0 io}, id = 7520 - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]): rowcount = 500.0, cumulative cost = {163562.5 rows, 25000.0 cpu, 0.0 io}, id = 7519 - LogicalProject(state=[$7], city=[$5], age=[$8]): rowcount = 5000.0, cumulative cost = {163000.0 rows, 25000.0 cpu, 0.0 io}, id = 7518 - LogicalFilter(condition=[>($8, 30)]): rowcount = 5000.0, cumulative cost = {158000.0 rows, 10000.0 cpu, 0.0 io}, id = 7517 - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]): rowcount = 10000.0, cumulative cost = {153000.0 rows, 0.0 cpu, 0.0 io}, id = 7516 + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]): rowcount = 56.25, cumulative cost = {165293.75 rows, 36190.23815546412 cpu, 0.0 io}, id = 3303 + LogicalProject(age2=[$2]): rowcount = 56.25, cumulative cost = {165237.5 rows, 35290.23815546412 cpu, 0.0 io}, id = 3302 + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]): rowcount = 56.25, cumulative cost = {165181.25 rows, 35233.98815546412 cpu, 0.0 io}, id = 3301 + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]): rowcount = 56.25, cumulative cost = {165125.0 rows, 29793.75 cpu, 0.0 io}, id = 3300 + LogicalFilter(condition=[<=($3, 1)]): rowcount = 56.25, cumulative cost = {165068.75 rows, 29625.0 cpu, 0.0 io}, id = 3299 + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]): rowcount = 225.0, cumulative cost = {165012.5 rows, 29400.0 cpu, 0.0 io}, id = 3298 + LogicalFilter(condition=[IS NOT NULL($2)]): rowcount = 225.0, cumulative cost = {164787.5 rows, 28500.0 cpu, 0.0 io}, id = 3297 + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]): rowcount = 500.0, cumulative cost = {164562.5 rows, 28000.0 cpu, 0.0 io}, id = 3296 + LogicalProject(avg_age=[$2], state=[$0], city=[$1]): rowcount = 500.0, cumulative cost = {164062.5 rows, 26500.0 cpu, 0.0 io}, id = 3292 + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]): rowcount = 500.0, cumulative cost = {163562.5 rows, 25000.0 cpu, 0.0 io}, id = 3291 + LogicalProject(state=[$7], city=[$5], age=[$8]): rowcount = 5000.0, cumulative cost = {163000.0 rows, 25000.0 cpu, 0.0 io}, id = 3290 + LogicalFilter(condition=[>($8, 30)]): rowcount = 5000.0, cumulative cost = {158000.0 rows, 10000.0 cpu, 0.0 io}, id = 3289 + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]): rowcount = 10000.0, cumulative cost = {153000.0 rows, 0.0 cpu, 0.0 io}, id = 3288 physical: | - EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]): rowcount = 225.0, cumulative cost = {2981.25 rows, 7950.0 cpu, 0.0 io}, id = 9382 - EnumerableLimit(fetch=[10000]): rowcount = 225.0, cumulative cost = {2756.25 rows, 7275.0 cpu, 0.0 io}, id = 9374 - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]): rowcount = 225.0, cumulative cost = {2531.25 rows, 7050.0 cpu, 0.0 io}, id = 9378 - EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]): rowcount = 450.0, cumulative cost = {2306.25 rows, 3900.0 cpu, 0.0 io}, id = 9370 - EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]): rowcount = 450.0, cumulative cost = {1856.25 rows, 3000.0 cpu, 0.0 io}, id = 9386 - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 500.0, cumulative cost = {1406.25 rows, 0.0 cpu, 0.0 io}, id = 9090 + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]): rowcount = 225.0, cumulative cost = {3201.75 rows, 39096.942171903866 cpu, 0.0 io}, id = 4096 + CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]): rowcount = 225.0, cumulative cost = {2976.75 rows, 38196.942171903866 cpu, 0.0 io}, id = 4088 + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]): rowcount = 225.0, cumulative cost = {2531.25 rows, 8950.0 cpu, 0.0 io}, id = 4092 + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]): rowcount = 450.0, cumulative cost = {2306.25 rows, 4900.0 cpu, 0.0 io}, id = 4084 + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]): rowcount = 450.0, cumulative cost = {1856.25 rows, 4000.0 cpu, 0.0 io}, id = 4100 + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#3404:LogicalAggregate.NONE.[](input=RelSubset#3372,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 500.0, cumulative cost = {1406.25 rows, 0.0 cpu, 0.0 io}, id = 3691 diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.json index 5ce93804ca4..45ec4243593 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.json @@ -1,7 +1,7 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0])\n EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3])\n EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n", - "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable = v1stashed.scan();\n final org.apache.calcite.linq4j.AbstractEnumerable source = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if ((Double) inputEnumerator.current() != null) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n return (Double) inputEnumerator.current() == null ? null : Double.valueOf(((Double) inputEnumerator.current()).doubleValue() + (double) 2);\n }\n\n };\n }\n\n };\n int prevStart;\n int prevEnd;\n final java.util.Comparator comparator = new java.util.Comparator(){\n public int compare(Double v0, Double v1) {\n int c;\n return 0;\n }\n\n public int compare(Object o0, Object o1) {\n return this.compare((Double) o0, (Double) o1);\n }\n\n };\n final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap();\n source.foreach(new org.apache.calcite.linq4j.function.Function1() {\n public Object apply(Double v) {\n Double key = v;\n multiMap.putMulti(key, v);\n return null;\n }\n public Object apply(Object v) {\n return apply(\n (Double) v);\n }\n }\n );\n final java.util.Iterator iterator = multiMap.arrays(comparator);\n final java.util.ArrayList _list = new java.util.ArrayList(\n multiMap.size());\n Long a0w0 = (Long) null;\n while (iterator.hasNext()) {\n final Object[] _rows = (Object[]) iterator.next();\n prevStart = -1;\n prevEnd = 2147483647;\n for (int i = 0; i < _rows.length; (++i)) {\n if (i != prevEnd) {\n int actualStart = i < prevEnd ? 0 : prevEnd + 1;\n prevEnd = i;\n a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue());\n }\n _list.add(new Object[] {\n (Double) _rows[i],\n a0w0});\n }\n }\n multiMap.clear();\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable0 = org.apache.calcite.linq4j.Linq4j.asEnumerable(_list);\n final org.apache.calcite.linq4j.AbstractEnumerable child = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable0.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[1]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Object input_value = current[0];\n final Object input_value0 = current[1];\n return new Object[] {\n input_value,\n input_value0};\n }\n\n static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue();\n };\n }\n\n };\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 = child.take(10000);\n return new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable1.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n return inputEnumerator.moveNext();\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n return (Double) ((Object[]) inputEnumerator.current())[0];\n }\n\n };\n }\n\n };\n}\n\n\npublic Class getElementType() {\n return java.lang.Double.class;\n}\n\n\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1])\n CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4])\n EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#2590:LogicalAggregate.NONE.[](input=RelSubset#2558,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n", + "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable = v1stashed.scan();\n final org.apache.calcite.linq4j.AbstractEnumerable source = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if ((Double) ((Object[]) inputEnumerator.current())[1] != null) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Double input_value0 = (Double) current[1];\n return new Object[] {\n current[0],\n input_value0 == null ? null : Double.valueOf(input_value0.doubleValue() + (double) 2)};\n }\n\n };\n }\n\n };\n int prevStart;\n int prevEnd;\n final java.util.Comparator comparator = new java.util.Comparator(){\n public int compare(Object[] v0, Object[] v1) {\n final int c;\n c = org.apache.calcite.runtime.Utilities.compareNullsFirst((Double) v0[1], (Double) v1[1]);\n if (c != 0) {\n return c;\n }\n return 0;\n }\n\n public int compare(Object o0, Object o1) {\n return this.compare((Object[]) o0, (Object[]) o1);\n }\n\n };\n final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap();\n source.foreach(new org.apache.calcite.linq4j.function.Function1() {\n public Object apply(Object[] v) {\n Double key = (Double) v[1];\n multiMap.putMulti(key, v);\n return null;\n }\n public Object apply(Object v) {\n return apply(\n (Object[]) v);\n }\n }\n );\n final java.util.Iterator iterator = multiMap.arrays(comparator);\n final java.util.ArrayList _list = new java.util.ArrayList(\n multiMap.size());\n Long a0w0 = (Long) null;\n while (iterator.hasNext()) {\n final Object[] _rows = (Object[]) iterator.next();\n prevStart = -1;\n prevEnd = 2147483647;\n for (int i = 0; i < _rows.length; (++i)) {\n final Object[] row = (Object[]) _rows[i];\n if (i != prevEnd) {\n int actualStart = i < prevEnd ? 0 : prevEnd + 1;\n prevEnd = i;\n a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue());\n }\n _list.add(new Object[] {\n row[0],\n row[1],\n a0w0});\n }\n }\n multiMap.clear();\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable0 = org.apache.calcite.linq4j.Linq4j.asEnumerable(_list);\n final org.apache.calcite.linq4j.AbstractEnumerable child = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable0.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[2]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Object input_value = current[0];\n final Object input_value0 = current[1];\n final Object input_value1 = current[2];\n return new Object[] {\n input_value,\n input_value0,\n input_value1};\n }\n\n static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue();\n };\n }\n\n };\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 = org.apache.calcite.linq4j.EnumerableDefaults.orderBy(child, new org.apache.calcite.linq4j.function.Function1() {\n public String apply(Object[] v) {\n return v[0] == null ? null : v[0].toString();\n }\n public Object apply(Object v) {\n return apply(\n (Object[]) v);\n }\n }\n , org.apache.calcite.linq4j.function.Functions.nullsComparator(true, false), 0, 10000);\n return new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable1.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n return inputEnumerator.moveNext();\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n return (Double) ((Object[]) inputEnumerator.current())[1];\n }\n\n };\n }\n\n };\n}\n\n\npublic Class getElementType() {\n return java.lang.Double.class;\n}\n\n\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.yaml index 89137cfc835..f91ea4ba4d8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_extended.yaml @@ -2,23 +2,24 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) - LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) - LogicalFilter(condition=[IS NOT NULL($2)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) - LogicalProject(avg_age=[$2], state=[$0], city=[$1]) - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) - LogicalProject(state=[$7], city=[$5], age=[$8]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]) + LogicalFilter(condition=[IS NOT NULL($2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) + LogicalProject(avg_age=[$2], state=[$0], city=[$1]) + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) + LogicalProject(state=[$7], city=[$5], age=[$8]) + LogicalFilter(condition=[>($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]) - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) - EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) + CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#1776:LogicalAggregate.NONE.[](input=RelSubset#1744,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) extended: |+ public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) { final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get("v1stashed"); @@ -33,7 +34,7 @@ calcite: public boolean moveNext() { while (inputEnumerator.moveNext()) { - if ((Double) inputEnumerator.current() != null) { + if ((Double) ((Object[]) inputEnumerator.current())[1] != null) { return true; } } @@ -45,7 +46,11 @@ calcite: } public Object current() { - return (Double) inputEnumerator.current() == null ? null : Double.valueOf(((Double) inputEnumerator.current()).doubleValue() + (double) 2); + final Object[] current = (Object[]) inputEnumerator.current(); + final Double input_value0 = (Double) current[1]; + return new Object[] { + current[0], + input_value0 == null ? null : Double.valueOf(input_value0.doubleValue() + (double) 2)}; } }; @@ -55,26 +60,30 @@ calcite: int prevStart; int prevEnd; final java.util.Comparator comparator = new java.util.Comparator(){ - public int compare(Double v0, Double v1) { - int c; + public int compare(Object[] v0, Object[] v1) { + final int c; + c = org.apache.calcite.runtime.Utilities.compareNullsFirst((Double) v0[1], (Double) v1[1]); + if (c != 0) { + return c; + } return 0; } public int compare(Object o0, Object o1) { - return this.compare((Double) o0, (Double) o1); + return this.compare((Object[]) o0, (Object[]) o1); } }; final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap(); source.foreach(new org.apache.calcite.linq4j.function.Function1() { - public Object apply(Double v) { - Double key = v; + public Object apply(Object[] v) { + Double key = (Double) v[1]; multiMap.putMulti(key, v); return null; } public Object apply(Object v) { return apply( - (Double) v); + (Object[]) v); } } ); @@ -87,13 +96,15 @@ calcite: prevStart = -1; prevEnd = 2147483647; for (int i = 0; i < _rows.length; (++i)) { + final Object[] row = (Object[]) _rows[i]; if (i != prevEnd) { int actualStart = i < prevEnd ? 0 : prevEnd + 1; prevEnd = i; a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue()); } _list.add(new Object[] { - (Double) _rows[i], + row[0], + row[1], a0w0}); } } @@ -109,7 +120,7 @@ calcite: public boolean moveNext() { while (inputEnumerator.moveNext()) { - if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[1]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) { + if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[2]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) { return true; } } @@ -124,9 +135,11 @@ calcite: final Object[] current = (Object[]) inputEnumerator.current(); final Object input_value = current[0]; final Object input_value0 = current[1]; + final Object input_value1 = current[2]; return new Object[] { input_value, - input_value0}; + input_value0, + input_value1}; } static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue(); @@ -134,7 +147,16 @@ calcite: } }; - final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 = child.take(10000); + final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 = org.apache.calcite.linq4j.EnumerableDefaults.orderBy(child, new org.apache.calcite.linq4j.function.Function1() { + public String apply(Object[] v) { + return v[0] == null ? null : v[0].toString(); + } + public Object apply(Object v) { + return apply( + (Object[]) v); + } + } + , org.apache.calcite.linq4j.function.Functions.nullsComparator(true, false), 0, 10000); return new org.apache.calcite.linq4j.AbstractEnumerable(){ public org.apache.calcite.linq4j.Enumerator enumerator() { return new org.apache.calcite.linq4j.Enumerator(){ @@ -152,7 +174,7 @@ calcite: } public Object current() { - return (Double) ((Object[]) inputEnumerator.current())[0]; + return (Double) ((Object[]) inputEnumerator.current())[1]; } }; @@ -164,4 +186,4 @@ calcite: public Class getElementType() { return java.lang.Double.class; - } + } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.json index 96e070954ec..5a3103178bb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.json @@ -1,5 +1,5 @@ { "calcite": { - "logical": "LogicalSystemLimit\n LogicalProject\n LogicalFilter\n LogicalProject\n LogicalFilter\n LogicalProject\n LogicalSort\n LogicalProject\n LogicalAggregate\n LogicalProject\n LogicalFilter\n CalciteLogicalIndexScan\n" + "logical": "LogicalSystemLimit\n LogicalProject\n LogicalSort\n LogicalProject\n LogicalFilter\n LogicalProject\n LogicalFilter\n LogicalProject\n LogicalProject\n LogicalAggregate\n LogicalProject\n LogicalFilter\n CalciteLogicalIndexScan\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.yaml index f58ffa560a7..c6b8c57b0f0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_simple.yaml @@ -2,13 +2,14 @@ calcite: logical: | LogicalSystemLimit LogicalProject - LogicalFilter + LogicalSort LogicalProject LogicalFilter LogicalProject - LogicalSort + LogicalFilter LogicalProject - LogicalAggregate - LogicalProject - LogicalFilter - CalciteLogicalIndexScan + LogicalProject + LogicalAggregate + LogicalProject + LogicalFilter + CalciteLogicalIndexScan diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.json index a50b0baa104..1874fc81ec7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0])\n EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3])\n EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age2=[$2])\n LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1])\n CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4])\n EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.yaml index 862a45dc617..b8e0f7ed6d5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output_standard.yaml @@ -2,20 +2,21 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) - LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) - LogicalFilter(condition=[IS NOT NULL($2)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) - LogicalProject(avg_age=[$2], state=[$0], city=[$1]) - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) - LogicalProject(state=[$7], city=[$5], age=[$8]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]) + LogicalFilter(condition=[IS NOT NULL($2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) + LogicalProject(avg_age=[$2], state=[$0], city=[$1]) + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) + LogicalProject(state=[$7], city=[$5], age=[$8]) + LogicalFilter(condition=[>($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..1=[{inputs}], age2=[$t0]) - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) - EnumerableWindow(window#0=[window(partition {0} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], $0=[$t2], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state], SORT->[1 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) + CalciteEnumerableTopK(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[2], expr#3=[+($t1, $t2)], expr#4=[IS NOT NULL($t1)], state=[$t0], age2=[$t3], $condition=[$t4]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[state, avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_isnull.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_isnull.json index 590f5e273b1..ac6f4f5a213 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_isnull.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_isnull.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], address=[$1], id=[$7], age=[$8])\n LogicalFilter(condition=[AND(IS NULL($1), =($0, 'david'))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t1)], proj#0..3=[{exprs}], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, id, age], FILTER->=($0, 'david')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"david\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"name\",\"address\",\"id\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t1)], proj#0..3=[{exprs}], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, id, age], FILTER->=($0, 'david')], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"david\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"name\",\"address\",\"id\",\"age\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push.json index 79719af6823..1b6a24e00bc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8], address=[$2])\n LogicalFilter(condition=[AND(OR(=($7, 'Seattle'), <($8, 10)), >=($8, 1), =($2, '880 Holmes Lane'))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=['880 Holmes Lane':VARCHAR], expr#3=[=($t0, $t2)], age=[$t1], address=[$t0], $condition=[$t3])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address, state, age], FILTER->AND(OR(=($1, 'Seattle'), <($2, 10)), >=($2, 1)), PROJECT->[address, age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"state.keyword\":{\"value\":\"Seattle\",\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":null,\"to\":10,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},{\"range\":{\"age\":{\"from\":1,\"to\":null,\"include_lower\":true,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"address\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=['880 Holmes Lane':VARCHAR], expr#3=[=($t0, $t2)], age=[$t1], address=[$t0], $condition=[$t3])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address, state, age], FILTER->AND(OR(=($1, 'Seattle'), <($2, 10)), >=($2, 1)), PROJECT->[address, age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"state.keyword\":{\"value\":\"Seattle\",\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":null,\"to\":10,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},{\"range\":{\"age\":{\"from\":1,\"to\":null,\"include_lower\":true,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"address\",\"age\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push2.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push2.json index 1f9a6fac9a5..aec685a1848 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push2.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_push2.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8], address=[$2])\n LogicalFilter(condition=[AND(OR(=($2, '671 Bristol Street'), <($8, 10)), OR(>=($8, 10), =($2, '880 Holmes Lane')))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=['671 Bristol Street':VARCHAR], expr#3=[=($t0, $t2)], expr#4=[10], expr#5=[<($t1, $t4)], expr#6=[OR($t3, $t5)], expr#7=[>=($t1, $t4)], expr#8=['880 Holmes Lane':VARCHAR], expr#9=[=($t0, $t8)], expr#10=[OR($t7, $t9)], expr#11=[AND($t6, $t10)], age=[$t1], address=[$t0], $condition=[$t11])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address, age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"address\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=['671 Bristol Street':VARCHAR], expr#3=[=($t0, $t2)], expr#4=[10], expr#5=[<($t1, $t4)], expr#6=[OR($t3, $t5)], expr#7=[>=($t1, $t4)], expr#8=['880 Holmes Lane':VARCHAR], expr#9=[=($t0, $t8)], expr#10=[OR($t7, $t9)], expr#11=[AND($t6, $t10)], age=[$t1], address=[$t0], $condition=[$t11])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address, age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"address\",\"age\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_script_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_script_push.json index 470b1f467cb..50c9467c4b8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_script_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_partial_filter_script_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(firstname=[$1], age=[$8], address=[$2])\n LogicalFilter(condition=[AND(=($2, '671 Bristol Street'), =(-($8, 2), 30))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=['671 Bristol Street':VARCHAR], expr#4=[=($t1, $t3)], firstname=[$t0], age=[$t2], address=[$t1], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, address, age], SCRIPT->=(-($2, 2), 30)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBVnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AnJ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiPSIsCiAgICAia2luZCI6ICJFUVVBTFMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICItIiwKICAgICAgICAia2luZCI6ICJNSU5VUyIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAyLAogICAgICAgICAgIm5hbWUiOiAiJDIiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAzMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAA3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgALeHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEXQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABh4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABoAAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+AAx+cQB+ABB0AAZTVFJJTkd+cQB+ABR0AAdLZXl3b3JkcQB+ABl4dAAHYWRkcmVzc3NxAH4ACnEAfgAScQB+ABVxAH4AGXNxAH4AAAAAAAN3BAAAAAB4dAADYWdlfnEAfgAQdAAETE9OR3h4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"firstname\",\"address\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=['671 Bristol Street':VARCHAR], expr#4=[=($t1, $t3)], firstname=[$t0], age=[$t2], address=[$t1], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, address, age], SCRIPT->=(-($2, 2), 30)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBVnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AnJ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiPSIsCiAgICAia2luZCI6ICJFUVVBTFMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICItIiwKICAgICAgICAia2luZCI6ICJNSU5VUyIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAyLAogICAgICAgICAgIm5hbWUiOiAiJDIiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAzMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAA3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgALeHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEXQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABh4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABoAAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+AAx+cQB+ABB0AAZTVFJJTkd+cQB+ABR0AAdLZXl3b3JkcQB+ABl4dAAHYWRkcmVzc3NxAH4ACnEAfgAScQB+ABVxAH4AGXNxAH4AAAAAAAN3BAAAAAB4dAADYWdlfnEAfgAQdAAETE9OR3h4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"firstname\",\"address\",\"age\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_brain_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_brain_agg_push.yaml index 0b2d4584804..004a6cb24b1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_brain_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_brain_agg_push.yaml @@ -15,7 +15,7 @@ calcite: EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) EnumerableAggregate(group=[{}], patterns_field=[pattern($0, $1, $2, $3)]) EnumerableCalc(expr#0=[{inputs}], expr#1=[10], expr#2=[100000], expr#3=[true], proj#0..3=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[email]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["email"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[email]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["email"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableUncollect EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.patterns_field], patterns_field=[$t2]) EnumerableValues(tuples=[[{ 0 }]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml index 71da341abfc..ccf7e71efa3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..10=[{inputs}], expr#11=[Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR], expr#12=[SEARCH($t9, $t11)], expr#13=['':VARCHAR], expr#14=['[a-zA-Z0-9]+':VARCHAR], expr#15=['<*>':VARCHAR], expr#16=[REGEXP_REPLACE($t9, $t14, $t15)], expr#17=[CASE($t12, $t13, $t16)], proj#0..10=[{exprs}], patterns_field=[$t17]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_prevent_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_prevent_limit_push.yaml index e7019b44d7d..24497570ab4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_prevent_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_prevent_limit_push.yaml @@ -7,4 +7,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1], fetch=[1]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_regex.yaml index 5d76ec9ab64..ae2fd24eb28 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_regex.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[REGEXP_CONTAINS($10, '^[A-Z][a-z]+$':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->REGEXP_CONTAINS($10, '^[A-Z][a-z]+$':VARCHAR), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBkXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["lastname.keyword","^[A-Z][a-z]+$"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->REGEXP_CONTAINS($10, '^[A-Z][a-z]+$':VARCHAR), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBkXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["lastname.keyword","^[A-Z][a-z]+$"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_negated.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_negated.yaml index 77cedbc871d..5645058561e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_negated.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_negated.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[NOT(REGEXP_CONTAINS($10, '.*son$':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->NOT(REGEXP_CONTAINS($10, '.*son$':VARCHAR)), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCWXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJOT1QiLAogICAgImtpbmQiOiAiTk9UIiwKICAgICJzeW50YXgiOiAiUFJFRklYIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX0NPTlRBSU5TIiwKICAgICAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["lastname.keyword",".*son$"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->NOT(REGEXP_CONTAINS($10, '.*son$':VARCHAR)), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCWXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJOT1QiLAogICAgImtpbmQiOiAiTk9UIiwKICAgICJzeW50YXgiOiAiUFJFRklYIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX0NPTlRBSU5TIiwKICAgICAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["lastname.keyword",".*son$"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_eval.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_eval.json index bcaaec137b6..b780a1f3361 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_eval.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_eval.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(has_hello=[REGEXP_CONTAINS($0, 'hello':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]])\n", - "physical": "EnumerableCalc(expr#0=[{inputs}], expr#1=['hello':VARCHAR], expr#2=[REGEXP_CONTAINS($t0, $t1)], has_hello=[$t2])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]], PushDownContext=[[PROJECT->[name], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"name\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableCalc(expr#0=[{inputs}], expr#1=['hello':VARCHAR], expr#2=[REGEXP_CONTAINS($t0, $t1)], has_hello=[$t2])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]], PushDownContext=[[PROJECT->[name], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"name\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.yaml index 664719d5411..635fa3de1d2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[REGEXP_CONTAINS($0, 'hello':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]], PushDownContext=[[PROJECT->[name], SCRIPT->REGEXP_CONTAINS($0, 'hello':VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBkXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2],"DIGESTS":["name","hello"]}},"boost":1.0}},"_source":{"includes":["name"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]], PushDownContext=[[PROJECT->[name], SCRIPT->REGEXP_CONTAINS($0, 'hello':VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBkXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2],"DIGESTS":["name","hello"]}},"boost":1.0}},"_source":{"includes":["name"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml index 7e951ffecf8..65dc5163f7f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_command.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0=[{inputs}], expr#1=['IL':VARCHAR], expr#2=['Illinois':VARCHAR], expr#3=[REPLACE($t0, $t1, $t2)], state=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_wildcard.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_wildcard.yaml index 784b05c63a5..26879d8a7e7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_wildcard.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_replace_wildcard.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0=[{inputs}], expr#1=['^\Q\E(.*?)\QL\E$':VARCHAR], expr#2=['STATE_IL':VARCHAR], expr#3=[REGEXP_REPLACE($t0, $t1, $t2)], state=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_ignored.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_ignored.yaml new file mode 100644 index 00000000000..eb89e742a1f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_ignored.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.yaml new file mode 100644 index 00000000000..ada4ac4514a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.yaml @@ -0,0 +1,18 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{ + "age" : { + "order" : "asc", + "missing" : "_first" + } + }, { + "firstname.keyword" : { + "order" : "desc", + "missing" : "_last" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]},"sort":[{"age":{"order":"asc","missing":"_first"}},{"firstname.keyword":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.yaml new file mode 100644 index 00000000000..f85ea43a051 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], dir0=[ASC-nulls-first]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{ + "age" : { + "order" : "asc", + "missing" : "_first" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_with_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_with_timestamp.yaml new file mode 100644 index 00000000000..828e7250873 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_with_timestamp.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[DESC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3]) + LogicalSort(sort0=[$0], dir0=[DESC], fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], SORT->[{ + "@timestamp" : { + "order" : "desc", + "missing" : "_first" + } + }], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["@timestamp","category","value","timestamp"]},"sort":[{"@timestamp":{"order":"desc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_rex.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_rex.yaml index 2c18345f7fb..b0ce9c715b3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_rex.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_rex.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..10=[{inputs}], expr#11=['(?^[A-Z])'], expr#12=['initial'], expr#13=[REX_EXTRACT($t10, $t11, $t12)], proj#0..10=[{exprs}], initial=[$t13]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_multi_range.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_multi_range.json index b9b6fc9c2b7..ac98d9192c3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_multi_range.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_multi_range.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalFilter(condition=[SEARCH($8, Sarg[0, [1..10], (20..30)])])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->SEARCH($0, Sarg[0, [1..10], (20..30)]), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"should\":[{\"term\":{\"age\":{\"value\":0.0,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":1.0,\"to\":10.0,\"include_lower\":true,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":20.0,\"to\":30.0,\"include_lower\":false,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->SEARCH($0, Sarg[0, [1..10], (20..30)]), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"should\":[{\"term\":{\"age\":{\"value\":0.0,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":1.0,\"to\":10.0,\"include_lower\":true,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":20.0,\"to\":30.0,\"include_lower\":false,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.yaml index f6debc48fcc..bdea2e2833f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_single_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[SEARCH($8, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->SEARCH($0, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":1.0,"to":10.0,"include_lower":true,"include_upper":false,"boost":1.0}}},"_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], FILTER->SEARCH($0, Sarg[[1.0:DECIMAL(11, 1)..10:DECIMAL(11, 1))]:DECIMAL(11, 1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":1.0,"to":10.0,"include_lower":true,"include_upper":false,"boost":1.0}}},"_source":{"includes":["age"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.yaml index cfb07502429..2d861e60248 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR)), <($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[['2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[['2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_correlated_subquery_in_select.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_correlated_subquery_in_select.yaml index 027e7e40c43..7fde9c0d05e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_correlated_subquery_in_select.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_correlated_subquery_in_select.yaml @@ -13,7 +13,7 @@ calcite: EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t3)], expr#5=[0:BIGINT], expr#6=[CASE($t4, $t5, $t3)], id=[$t1], name=[$t0], count_dept=[$t6]) EnumerableLimit(fetch=[10000]) EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($1, $2)], joinType=[left]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t2)], expr#4=[0], expr#5=[CASE($t3, $t2, $t4)], uid=[$t0], count(name)=[$t5]) EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[left]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0})], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"id":{"terms":{"field":"id","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_correlated_subquery_in_where.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_correlated_subquery_in_where.yaml index 42e5e196667..73eb8a9ca2f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_correlated_subquery_in_where.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_correlated_subquery_in_where.yaml @@ -13,6 +13,6 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..1=[{inputs}], id=[$t1], name=[$t0]) EnumerableHashJoin(condition=[=($1, $2)], joinType=[semi]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0..1=[{inputs}], expr#2=[=($t0, $t1)], proj#0..1=[{exprs}], $condition=[$t2]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},max(uid)=MAX($1))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"uid","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"uid1":{"terms":{"field":"uid","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"max(uid)":{"max":{"field":"uid"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_uncorrelated_subquery_in_select.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_uncorrelated_subquery_in_select.yaml index 70fcf1c804d..9ce6c6a42d4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_uncorrelated_subquery_in_select.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_uncorrelated_subquery_in_select.yaml @@ -11,5 +11,5 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableNestedLoopJoin(condition=[true], joinType=[left]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},count(name)=COUNT($0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"name","boost":1.0}},"track_total_hits":2147483647}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_uncorrelated_subquery_in_where.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_uncorrelated_subquery_in_where.yaml index a14c422cfb5..042787a458b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_uncorrelated_subquery_in_where.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_scalar_uncorrelated_subquery_in_where.yaml @@ -13,5 +13,5 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..2=[{inputs}], name=[$t0]) EnumerableNestedLoopJoin(condition=[>($1, +($2, 999))], joinType=[inner]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_worker]], PushDownContext=[[PROJECT->[name, id]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["name","id"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_work_information]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},count(name)=COUNT($0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"name","boost":1.0}},"track_total_hits":2147483647}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json index 85494ecb282..a6f59992d19 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_basic_text.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'ERROR':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'ERROR':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'ERROR':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json index 1883a3503fc..59db7160b0e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_numeric_comparison.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'severityNumber:>15':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'severityNumber:>15':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'severityNumber:>15':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json index 835dfca0835..077d79196e7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_wildcard_star.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(spanId=[$0], traceId=[$1], @timestamp=[$2], instrumentationScope=[$3], severityText=[$7], resource=[$8], flags=[$23], attributes=[$24], droppedAttributesCount=[$162], severityNumber=[$163], time=[$164], body=[$165])\n LogicalFilter(condition=[query_string(MAP('query', 'severityText:ERR*':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'severityText:ERR*':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[spanId, traceId, @timestamp, instrumentationScope, severityText, resource, flags, attributes, droppedAttributesCount, severityNumber, time, body], FILTER->query_string(MAP('query', 'severityText:ERR*':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json index fb6ef8e36ff..1389bca6d19 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json @@ -1,6 +1,6 @@ { "calcite":{ "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical":"CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"birthdate\":{\"query\":\"\\\"2016\\\\-12\\\\-08 00\\\\:00\\\\:00.000000000\\\"\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical":"CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"birthdate\":{\"query\":\"\\\"2016\\\\-12\\\\-08 00\\\\:00\\\\:00.000000000\\\"\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_push.json index 779add87f49..066b678a960 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$10], age2=[$19])\n LogicalSort(sort0=[$19], dir0=[ASC-nulls-first])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, 2)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], age=[$t0], age2=[$t2])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], age=[$t0], age2=[$t2])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_pushdown_for_smj.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_pushdown_for_smj.yaml index 3c6bf5d725d..d0e66b1d14f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_pushdown_for_smj.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_pushdown_for_smj.yaml @@ -18,11 +18,11 @@ calcite: "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"age":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0..12=[{inputs}], expr#13=[20], expr#14=[-($t7, $t13)], proj#0..12=[{exprs}], $f13=[$t14]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000, SORT->[{ "balance" : { "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"balance":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"balance":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_single_expr_output_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_single_expr_output_push.json index ff07a6cddf6..0a36ec4648d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_single_expr_output_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_simple_sort_expr_single_expr_output_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(b=[$19])\n LogicalSort(sort0=[$19], dir0=[ASC-nulls-first])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], b=[+($7, 1)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableCalc(expr#0=[{inputs}], expr#1=[1], expr#2=[+($t0, $t1)], b=[$t2])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance], SORT->[{\n \"balance\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"balance\"],\"excludes\":[]},\"sort\":[{\"balance\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableCalc(expr#0=[{inputs}], expr#1=[1], expr#2=[+($t0, $t1)], b=[$t2])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance], SORT->[{\n \"balance\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"balance\"]},\"sort\":[{\"balance\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json index f9f084b81a9..248a0a42695 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_single_field_relevance_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalFilter(condition=[match(MAP('field', $9), MAP('query', '*@gmail.com':VARCHAR), MAP('boost', '1.0':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->match(MAP('field', $9), MAP('query', '*@gmail.com':VARCHAR), MAP('boost', '1.0':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"email\":{\"query\":\"*@gmail.com\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->match(MAP('field', $9), MAP('query', '*@gmail.com':VARCHAR), MAP('boost', '1.0':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"email\":{\"query\":\"*@gmail.com\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_skip_script_encoding.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_skip_script_encoding.yaml index 7938c9ea602..de78240cbea 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_skip_script_encoding.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_skip_script_encoding.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(=($2, '671 Bristol Street'), =(-($8, 2), 30))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, address, age], SCRIPT->AND(=($1, '671 Bristol Street'), =(-($2, 2), 30)), PROJECT->[firstname, age, address], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"{\\n \\\"op\\\": {\\n \\\"name\\\": \\\"=\\\",\\n \\\"kind\\\": \\\"EQUALS\\\",\\n \\\"syntax\\\": \\\"BINARY\\\"\\n },\\n \\\"operands\\\": [\\n {\\n \\\"dynamicParam\\\": 0,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"VARCHAR\\\",\\n \\\"nullable\\\": true,\\n \\\"precision\\\": -1\\n }\\n },\\n {\\n \\\"dynamicParam\\\": 1,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"VARCHAR\\\",\\n \\\"nullable\\\": true,\\n \\\"precision\\\": -1\\n }\\n }\\n ]\\n}\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2],"DIGESTS":["address","671 Bristol Street"]}},"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"{\\n \\\"op\\\": {\\n \\\"name\\\": \\\"=\\\",\\n \\\"kind\\\": \\\"EQUALS\\\",\\n \\\"syntax\\\": \\\"BINARY\\\"\\n },\\n \\\"operands\\\": [\\n {\\n \\\"op\\\": {\\n \\\"name\\\": \\\"-\\\",\\n \\\"kind\\\": \\\"MINUS\\\",\\n \\\"syntax\\\": \\\"BINARY\\\"\\n },\\n \\\"operands\\\": [\\n {\\n \\\"dynamicParam\\\": 0,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"BIGINT\\\",\\n \\\"nullable\\\": true\\n }\\n },\\n {\\n \\\"dynamicParam\\\": 1,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"BIGINT\\\",\\n \\\"nullable\\\": true\\n }\\n }\\n ],\\n \\\"type\\\": {\\n \\\"type\\\": \\\"BIGINT\\\",\\n \\\"nullable\\\": true\\n }\\n },\\n {\\n \\\"dynamicParam\\\": 2,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"BIGINT\\\",\\n \\\"nullable\\\": true\\n }\\n }\\n ]\\n}\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["age",2,30]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age","address"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, address, age], SCRIPT->AND(=($1, '671 Bristol Street'), =(-($2, 2), 30)), PROJECT->[firstname, age, address], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"{\\n \\\"op\\\": {\\n \\\"name\\\": \\\"=\\\",\\n \\\"kind\\\": \\\"EQUALS\\\",\\n \\\"syntax\\\": \\\"BINARY\\\"\\n },\\n \\\"operands\\\": [\\n {\\n \\\"dynamicParam\\\": 0,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"VARCHAR\\\",\\n \\\"nullable\\\": true,\\n \\\"precision\\\": -1\\n }\\n },\\n {\\n \\\"dynamicParam\\\": 1,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"VARCHAR\\\",\\n \\\"nullable\\\": true,\\n \\\"precision\\\": -1\\n }\\n }\\n ]\\n}\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2],"DIGESTS":["address","671 Bristol Street"]}},"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"{\\n \\\"op\\\": {\\n \\\"name\\\": \\\"=\\\",\\n \\\"kind\\\": \\\"EQUALS\\\",\\n \\\"syntax\\\": \\\"BINARY\\\"\\n },\\n \\\"operands\\\": [\\n {\\n \\\"op\\\": {\\n \\\"name\\\": \\\"-\\\",\\n \\\"kind\\\": \\\"MINUS\\\",\\n \\\"syntax\\\": \\\"BINARY\\\"\\n },\\n \\\"operands\\\": [\\n {\\n \\\"dynamicParam\\\": 0,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"BIGINT\\\",\\n \\\"nullable\\\": true\\n }\\n },\\n {\\n \\\"dynamicParam\\\": 1,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"BIGINT\\\",\\n \\\"nullable\\\": true\\n }\\n }\\n ],\\n \\\"type\\\": {\\n \\\"type\\\": \\\"BIGINT\\\",\\n \\\"nullable\\\": true\\n }\\n },\\n {\\n \\\"dynamicParam\\\": 2,\\n \\\"type\\\": {\\n \\\"type\\\": \\\"BIGINT\\\",\\n \\\"nullable\\\": true\\n }\\n }\\n ]\\n}\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["age",2,30]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age","address"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml index 616e24c3b71..08e75fbbdeb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[1], expr#15=[+($t7, $t14)], proj#0..13=[{exprs}], balance2=[$t15]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST, balance ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}},{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST, balance ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBQ3sKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}},{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_count_push.yaml index ea09a6f976f..e15b04d089c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_count_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_count_push.yaml @@ -10,4 +10,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + }], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_desc_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_desc_push.json index 204daa39ab0..569c99c0492 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_desc_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_desc_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8], firstname=[$1])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, age], SORT->[{\n \"age\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], PROJECT->[age, firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\",\"firstname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, age], SORT->[{\n \"age\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], PROJECT->[age, firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\",\"firstname\"]},\"sort\":[{\"age\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_pass_through_join_then_pushdown.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_pass_through_join_then_pushdown.yaml index f3ba3bd9694..ff6310f74dd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_pass_through_join_then_pushdown.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_pass_through_join_then_pushdown.yaml @@ -13,10 +13,10 @@ calcite: CalciteEnumerableTopK(sort0=[$13], dir0=[ASC-nulls-first], fetch=[10000]) EnumerableMergeJoin(condition=[=($13, $15)], joinType=[left]) EnumerableCalc(expr#0..12=[{inputs}], expr#13=['(?^[A-Z])'], expr#14=['initial'], expr#15=[REX_EXTRACT($t6, $t13, $t14)], proj#0..12=[{exprs}], initial=[$t15]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[REX_EXTRACT($6, '(?^[A-Z])', 'initial') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?^[A-Z])","initial"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[REX_EXTRACT($6, '(?^[A-Z])', 'initial') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?^[A-Z])","initial"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000, SORT->[{ "firstname" : { "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"firstname":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"]},"sort":[{"firstname":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_push.json index 7b75e10b17d..d3334d019d0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], FILTER->>($0, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], FILTER->>($0, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_rename_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_rename_push.json index 6281b1825b6..7d50a16c139 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_rename_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_rename_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(alias=[$17])\n LogicalSort(sort0=[$17], dir0=[ASC-nulls-first])\n LogicalProject(account_number=[$0], name=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], alias=[$1])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname], SORT->[{\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"firstname\"],\"excludes\":[]},\"sort\":[{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname], SORT->[{\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"firstname\"]},\"sort\":[{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_then_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_then_limit_push.yaml index ea09a6f976f..e15b04d089c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_then_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_then_limit_push.yaml @@ -10,4 +10,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + }], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json index c9959ce01da..621fb8da46b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalSort(sort0=[$17], dir0=[ASC-nulls-first])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], $f17=[SAFE_CAST($8)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], SORT_EXPR->[SAFE_CAST($0) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXQBHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTQUZFX0NBU1QiLAogICAgImtpbmQiOiAiU0FGRV9DQVNUIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXSwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfQp9\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"MISSING_MAX\":false,\"utcTimestamp\":*,\"SOURCES\":[0],\"DIGESTS\":[\"age\"]}},\"type\":\"number\",\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], SORT_EXPR->[SAFE_CAST($0) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"]},\"sort\":[{\"_script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXQBHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTQUZFX0NBU1QiLAogICAgImtpbmQiOiAiU0FGRV9DQVNUIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXSwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfQp9\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"MISSING_MAX\":false,\"utcTimestamp\":*,\"SOURCES\":[0],\"DIGESTS\":[\"age\"]}},\"type\":\"number\",\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_dc.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_dc.yaml index 9dd91501bf8..e6ecd590f0d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_dc.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_dc.yaml @@ -6,4 +6,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml index d464ea5b6d8..0a280b77dfb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml @@ -11,4 +11,4 @@ calcite: CalciteEnumerableTopK(sort0=[$11], dir0=[ASC], fetch=[10000]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml index b8ddae83a2e..2d6062c1148 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml @@ -11,4 +11,4 @@ calcite: CalciteEnumerableTopK(sort0=[$5], dir0=[ASC], fetch=[10000]) EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml index 86666bdaf8a..8a7612054c8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml @@ -11,4 +11,4 @@ calcite: CalciteEnumerableTopK(sort0=[$5], dir0=[ASC], fetch=[10000]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_no_group.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_no_group.yaml index f17643ab804..76bf29a6032 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_no_group.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_no_group.yaml @@ -6,4 +6,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml index b1b492f44a7..124539b9d4c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml @@ -3,29 +3,19 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalAggregate(group=[{}], avg_age=[AVG($0)]) - LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], avg_age=[AVG($20)]) + LogicalJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), IS NOT DISTINCT FROM($4, $19))], joinType=[left]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_gender__=[$4], __r_age__=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t18]) - EnumerableLimit(fetch=[10000]) - EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) - EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[1], expr#13=[-($t11, $t12)], expr#14=[IS NULL($t4)], proj#0..11=[{exprs}], $f12=[$t13], $f15=[$t14]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[0], expr#21=[=($t19, $t20)], expr#22=[null:BIGINT], expr#23=[CASE($t21, $t22, $t18)], expr#24=[CAST($t23):DOUBLE], expr#25=[/($t24, $t19)], proj#0..10=[{exprs}], avg_age=[$t25]) + CalciteEnumerableTopK(sort0=[$17], dir0=[ASC], fetch=[10000]) + EnumerableAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], agg#0=[$SUM0($20)], agg#1=[COUNT($20)]) + EnumerableNestedLoopJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), IS NOT DISTINCT FROM($4, $19))], joinType=[left]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..2=[{inputs}], __r_seq__=[$t2], __r_gender__=[$t0], __r_age__=[$t1]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) - EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) - EnumerableNestedLoopJoin(condition=[AND(>=($6, $2), <=($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2, 3}]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[-($t1, $t2)], expr#4=[IS NULL($t0)], proj#0..1=[{exprs}], $f12=[$t3], $f15=[$t4]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml index 24425578af2..a1cf6ae00e9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml @@ -3,29 +3,21 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalAggregate(group=[{}], avg_age=[AVG($0)]) - LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), =($4, $cor0.gender))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], avg_age=[AVG($20)]) + LogicalJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), =($4, $19))], joinType=[left]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_gender__=[$4], __r_age__=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) - CalciteEnumerableTopK(sort0=[$11], dir0=[ASC], fetch=[10000]) - EnumerableMergeJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) - EnumerableSort(sort0=[$4], sort1=[$11], sort2=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[1], expr#13=[-($t11, $t12)], proj#0..11=[{exprs}], $f12=[$t13]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[0], expr#21=[=($t19, $t20)], expr#22=[null:BIGINT], expr#23=[CASE($t21, $t22, $t18)], expr#24=[CAST($t23):DOUBLE], expr#25=[/($t24, $t19)], proj#0..10=[{exprs}], avg_age=[$t25]) + CalciteEnumerableTopK(sort0=[$17], dir0=[ASC], fetch=[10000]) + EnumerableAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], agg#0=[$SUM0($20)], agg#1=[COUNT($20)]) + EnumerableMergeJoin(condition=[AND(=($4, $19), >=($18, -($17, 1)), <=($18, $17))], joinType=[left]) + EnumerableSort(sort0=[$4], dir0=[ASC]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) - EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) - EnumerableHashJoin(condition=[AND(=($0, $3), >=($5, $2), <=($5, $1))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2}]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[-($t1, $t2)], proj#0..1=[{exprs}], $f12=[$t3]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], __r_seq__=[$t2], __r_gender__=[$t0], __r_age__=[$t1]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml index c4b49653614..d52457d6671 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml @@ -12,4 +12,4 @@ calcite: EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($8), COUNT($8)])]) EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t4)], proj#0..12=[{exprs}]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml index fa6654252f4..72f8f4d6ca7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml @@ -22,7 +22,7 @@ calcite: EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $15 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) EnumerableCalc(expr#0..11=[{inputs}], expr#12=[34], expr#13=[>($t8, $t12)], expr#14=[1], expr#15=[0], expr#16=[CASE($t13, $t14, $t15)], expr#17=[25], expr#18=[<($t8, $t17)], expr#19=[CASE($t18, $t14, $t15)], expr#20=[IS NULL($t4)], proj#0..11=[{exprs}], __reset_before_flag__=[$t16], __reset_after_flag__=[$t19], $14=[$t20]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) @@ -32,9 +32,9 @@ calcite: EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], expr#11=[IS NULL($t0)], gender=[$t0], __stream_seq__=[$t2], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10], $4=[$t11]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], proj#0..2=[{exprs}], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml index 7be62745372..42b50e7eb5f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml @@ -22,7 +22,7 @@ calcite: EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) EnumerableCalc(expr#0..11=[{inputs}], expr#12=[34], expr#13=[>($t8, $t12)], expr#14=[1], expr#15=[0], expr#16=[CASE($t13, $t14, $t15)], expr#17=[25], expr#18=[<($t8, $t17)], expr#19=[CASE($t18, $t14, $t15)], proj#0..11=[{exprs}], __reset_before_flag__=[$t16], __reset_after_flag__=[$t19]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]) EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) @@ -32,9 +32,9 @@ calcite: EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], gender=[$t0], __stream_seq__=[$t2], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], proj#0..2=[{exprs}], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_take_negative.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_take_negative.yaml index 10bdbc0d25c..49ba42abed2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_take_negative.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_take_negative.yaml @@ -10,4 +10,4 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], take(employer, 0)=[$t1], take(balance, -2)=[$t2], gender=[$t0]) EnumerableAggregate(group=[{0}], take(employer, 0)=[TAKE($1, $3)], take(balance, -2)=[TAKE($2, $4)]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[-2], proj#0..4=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[gender, employer, balance]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","employer","balance"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[gender, employer, balance]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","employer","balance"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_ilike_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_ilike_function.yaml index 59afad59f00..0b13885f73f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_ilike_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_ilike_function.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCB3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJJTElLRSIsCiAgICAia2luZCI6ICJMSUtFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCB3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJJTElLRSIsCiAgICAia2luZCI6ICJMSUtFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml index ba876eb3370..9ca9c104e89 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[LIKE($2, '%Holmes%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->LIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCBnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMSUtFIiwKICAgICJraW5kIjogIkxJS0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->LIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCBnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMSUtFIiwKICAgICJraW5kIjogIkxJS0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function_case_insensitive.yaml index 59afad59f00..0b13885f73f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function_case_insensitive.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function_case_insensitive.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCB3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJJTElLRSIsCiAgICAia2luZCI6ICJMSUtFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCB3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJJTElLRSIsCiAgICAia2luZCI6ICJMSUtFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml index b4acb2b0530..4f14b591721 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml @@ -28,7 +28,7 @@ calcite: EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], avg(cpu_usage)=[$t8]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t0, $t3, $t4)], host=[$t1], cpu_usage=[$t2], @timestamp0=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host, cpu_usage], FILTER->AND(IS NOT NULL($0), IS NOT NULL($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"cpu_usage","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp","host","cpu_usage"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host, cpu_usage], FILTER->AND(IS NOT NULL($0), IS NOT NULL($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"cpu_usage","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp","host","cpu_usage"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$0], dir0=[ASC]) EnumerableCalc(expr#0..2=[{inputs}], host=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) @@ -36,4 +36,4 @@ calcite: EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], avg(cpu_usage)=[$t8]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], @timestamp0=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host, cpu_usage], FILTER->AND(IS NOT NULL($0), IS NOT NULL($2)), PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"cpu_usage","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"host","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host, cpu_usage], FILTER->AND(IS NOT NULL($0), IS NOT NULL($2)), PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"cpu_usage","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"host","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_count.yaml index 9e462061605..6e68f5335d1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_count.yaml @@ -27,9 +27,9 @@ calcite: EnumerableCalc(expr#0..2=[{inputs}], @timestamp=[$t1], host=[$t0], count()=[$t2]) EnumerableAggregate(group=[{0, 1}], count()=[COUNT()]) EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=['m'], expr#4=[SPAN($t0, $t2, $t3)], host=[$t1], @timestamp0=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"_source":{"includes":["@timestamp","host"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"_source":{"includes":["@timestamp","host"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$0], dir0=[ASC]) EnumerableCalc(expr#0..2=[{inputs}], host=[$t0], $1=[$t2]) EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableAggregate(group=[{0}], __grand_total__=[COUNT()]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host], FILTER->IS NOT NULL($0), PROJECT->[host, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"filter":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"host","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["host","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host], FILTER->IS NOT NULL($0), PROJECT->[host, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"filter":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"host","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["host","@timestamp"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_top_k_then_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_top_k_then_sort_push.yaml index cba5870494c..b8deccc2e12 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_top_k_then_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_top_k_then_sort_push.yaml @@ -13,4 +13,4 @@ calcite: "order" : "asc", "missing" : "_first" } - }], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]},"sort":[{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file + }], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"]},"sort":[{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml index 80409e6f717..d0a2f80d866 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -3,9 +3,9 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4]) LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) - LogicalProject(value=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) + LogicalProject(_value_transpose_=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) LogicalFilter(condition=[IS NOT NULL($19)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], _value_transpose_=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)]) LogicalJoin(condition=[true], joinType=[inner]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()]) LogicalSort(fetch=[5]) @@ -14,9 +14,9 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) - EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], value=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], _value_transpose_=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59]) EnumerableNestedLoopJoin(condition=[true], joinType=[inner]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0=[{inputs}], expr#1=[Sarg['account_number', 'address':CHAR(14), 'age':CHAR(14), 'balance':CHAR(14), 'city':CHAR(14), 'email':CHAR(14), 'employer':CHAR(14), 'firstname':CHAR(14), 'gender':CHAR(14), 'lastname':CHAR(14), 'state':CHAR(14)]:CHAR(14)], expr#2=[SEARCH($t0, $t1)], column_names=[$t0], $condition=[$t2]) EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_push.yaml index 683bfe610cd..bc2ece48ccf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_push.yaml @@ -10,4 +10,4 @@ calcite: EnumerableCalc(expr#0..3=[{inputs}], expr#4=[1], expr#5=[>($t1, $t4)], expr#6=[CAST($t3):DOUBLE NOT NULL], expr#7=[/($t2, $t6)], expr#8=[null:NULL], expr#9=[CASE($t5, $t7, $t8)], ageTrend=[$t9]) EnumerableWindow(window#0=[window(rows between $1 PRECEDING and CURRENT ROW aggs [COUNT(), $SUM0($0), COUNT($0)])], constants=[[1]]) EnumerableCalc(expr#0=[{inputs}], expr#1=[IS NOT NULL($t0)], age=[$t0], $condition=[$t1]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_sort_push.yaml index 94265227c8e..354c7f74fbe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_sort_push.yaml @@ -16,4 +16,4 @@ calcite: "order" : "asc", "missing" : "_last" } - }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["age"]},"sort":[{"age":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_union.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_union.yaml new file mode 100644 index 00000000000..3ae4928bf42 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_union.yaml @@ -0,0 +1,20 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(gender=[$4]) + LogicalUnion(all=[true]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[<($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[>=($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], gender=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableUnion(all=[true]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age], FILTER-><($1, 30), PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"age":{"from":null,"to":30,"include_lower":true,"include_upper":false,"boost":1.0}}},"_source":{"includes":["gender"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age], FILTER->>=($1, 30), PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":true,"include_upper":true,"boost":1.0}}},"_source":{"includes":["gender"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_values_aggregation.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_values_aggregation.json index 1dd0bc574f0..895c3673294 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_values_aggregation.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_values_aggregation.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalAggregate(group=[{}], age_values=[VALUES($0)])\n LogicalProject(age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableAggregate(group=[{}], age_values=[VALUES($0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableAggregate(group=[{}], age_values=[VALUES($0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_computed_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_computed_nested.yaml index ca9d3fdd9fd..249509e9a17 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/filter_computed_nested.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_computed_nested.yaml @@ -6,4 +6,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]]) physical: | EnumerableCalc(expr#0=[{inputs}], expr#1=[CHAR_LENGTH($t0)], proj#0..1=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]], PushDownContext=[[PROJECT->[projects.name], SCRIPT->>(CHAR_LENGTH($0), 29), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICI8IiwKICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgICAgICJraW5kIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":[29,"projects.name"]}},"boost":1.0}},"path":"projects","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["projects.name"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]], PushDownContext=[[PROJECT->[projects.name], SCRIPT->>(CHAR_LENGTH($0), 29), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICI8IiwKICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgICAgICJraW5kIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":[29,"projects.name"]}},"boost":1.0}},"path":"projects","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["projects.name"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_multiple_nested_cascaded_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_multiple_nested_cascaded_range.yaml index ef0cf5d9813..1d9ec67bfb1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/filter_multiple_nested_cascaded_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_multiple_nested_cascaded_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(SEARCH($4, Sarg[[4..6)]), =($6, 'The Shining'))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]], PushDownContext=[[PROJECT->[author, author.books.reviews.rating, author.books.title], FILTER->AND(SEARCH($1, Sarg[[4..6)]), =($2, 'The Shining')), PROJECT->[author], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"nested":{"query":{"range":{"author.books.reviews.rating":{"from":4.0,"to":6.0,"include_lower":true,"include_upper":false,"boost":1.0}}},"path":"author.books.reviews","ignore_unmapped":false,"score_mode":"none","boost":1.0}},{"nested":{"query":{"term":{"author.books.title":{"value":"The Shining","boost":1.0}}},"path":"author.books","ignore_unmapped":false,"score_mode":"none","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["author"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]], PushDownContext=[[PROJECT->[author, author.books.reviews.rating, author.books.title], FILTER->AND(SEARCH($1, Sarg[[4..6)]), =($2, 'The Shining')), PROJECT->[author], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"nested":{"query":{"range":{"author.books.reviews.rating":{"from":4.0,"to":6.0,"include_lower":true,"include_upper":false,"boost":1.0}}},"path":"author.books.reviews","ignore_unmapped":false,"score_mode":"none","boost":1.0}},{"nested":{"query":{"term":{"author.books.title":{"value":"The Shining","boost":1.0}}},"path":"author.books","ignore_unmapped":false,"score_mode":"none","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["author"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_term.yaml index 68c8800a968..678a4d094d7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_term.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[=($3, 'New york city')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, address.city, id, age], FILTER->=($2, 'New york city'), PROJECT->[name, address, id, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"term":{"address.city.keyword":{"value":"New york city","boost":1.0}}},"path":"address","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["name","address","id","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, address.city, id, age], FILTER->=($2, 'New york city'), PROJECT->[name, address, id, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"term":{"address.city.keyword":{"value":"New york city","boost":1.0}}},"path":"address","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["name","address","id","age"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_terms.yaml index 44169558a1d..4e96b7e3ed3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_terms.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[SEARCH($3, Sarg['Miami':VARCHAR, 'san diego':VARCHAR]:VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, address.city, id, age], FILTER->SEARCH($2, Sarg['Miami':VARCHAR, 'san diego':VARCHAR]:VARCHAR), PROJECT->[name, address, id, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"terms":{"address.city.keyword":["Miami","san diego"],"boost":1.0}},"path":"address","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["name","address","id","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, address.city, id, age], FILTER->SEARCH($2, Sarg['Miami':VARCHAR, 'san diego':VARCHAR]:VARCHAR), PROJECT->[name, address, id, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"terms":{"address.city.keyword":["Miami","san diego"],"boost":1.0}},"path":"address","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["name","address","id","age"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_root_and_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_root_and_nested.yaml index 06868a06e8f..d5f13851d4f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/filter_root_and_nested.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_root_and_nested.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(=($7, 'Seattle'), >(CHAR_LENGTH($3), 29))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]], PushDownContext=[[PROJECT->[accounts, projects, projects.name, city, city.name, account], SCRIPT->AND(=($4, 'Seattle'), >(CHAR_LENGTH($2), 29)), PROJECT->[accounts, projects, city, account], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"term":{"city.name":{"value":"Seattle","boost":1.0}}},{"nested":{"query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICI8IiwKICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgICAgICJraW5kIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":[29,"projects.name"]}},"boost":1.0}},"path":"projects","ignore_unmapped":false,"score_mode":"none","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["accounts","projects","city","account"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]], PushDownContext=[[PROJECT->[accounts, projects, projects.name, city, city.name, account], SCRIPT->AND(=($4, 'Seattle'), >(CHAR_LENGTH($2), 29)), PROJECT->[accounts, projects, city, account], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"term":{"city.name":{"value":"Seattle","boost":1.0}}},{"nested":{"query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICI8IiwKICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgICAgICJraW5kIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":[29,"projects.name"]}},"boost":1.0}},"path":"projects","ignore_unmapped":false,"score_mode":"none","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["accounts","projects","city","account"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_absolute_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_absolute_time_range.yaml index a3bcf11b79e..3f92b202711 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_absolute_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_absolute_time_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=2022\-12\-10T13\:11\:04Z) AND (@timestamp:<=2025\-09\-03T15\:10\:00Z)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=2022\-12\-10T13\:11\:04Z) AND (@timestamp:<=2025\-09\-03T15\:10\:00Z)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=2022\\-12\\-10T13\\:11\\:04Z) AND (@timestamp:<=2025\\-09\\-03T15\\:10\\:00Z)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=2022\-12\-10T13\:11\:04Z) AND (@timestamp:<=2025\-09\-03T15\:10\:00Z)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=2022\\-12\\-10T13\\:11\\:04Z) AND (@timestamp:<=2025\\-09\\-03T15\\:10\\:00Z)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_chained_time_modifier.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_chained_time_modifier.yaml index 85578283b39..d29edc6189b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_chained_time_modifier.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_chained_time_modifier.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=now\-3d\/d\-2h\+10m) AND (@timestamp:<=now\-1d\+1y\/M)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3d\/d\-2h\+10m) AND (@timestamp:<=now\-1d\+1y\/M)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3d\\/d\\-2h\\+10m) AND (@timestamp:<=now\\-1d\\+1y\\/M)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3d\/d\-2h\+10m) AND (@timestamp:<=now\-1d\+1y\/M)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3d\\/d\\-2h\\+10m) AND (@timestamp:<=now\\-1d\\+1y\\/M)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_numeric_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_numeric_time_range.yaml index 464a0e108fd..d1f3ec37cbf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_numeric_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_numeric_time_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=1000) AND (@timestamp:<=1754020061123.456)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_range.yaml index 10badf7a8af..89368cbb44c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_range.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=now\-3M) AND (@timestamp:<=now\+30d)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3M) AND (@timestamp:<=now\+30d)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M) AND (@timestamp:<=now\\+30d)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3M) AND (@timestamp:<=now\+30d)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M) AND (@timestamp:<=now\\+30d)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_snap.yaml b/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_snap.yaml index 04b2b245ef1..5a4aef6d8d9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_snap.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/search_with_relative_time_snap.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[query_string(MAP('query', '(@timestamp:>=now\-3M\/y) AND (@timestamp:<=now)':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3M\/y) AND (@timestamp:<=now)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M\\/y) AND (@timestamp:<=now)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], FILTER->query_string(MAP('query', '(@timestamp:>=now\-3M\/y) AND (@timestamp:<=now)':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"query_string":{"query":"(@timestamp:>=now\\-3M\\/y) AND (@timestamp:<=now)","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},"_source":{"includes":["@timestamp","category","value","timestamp"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_ctime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_ctime.yaml new file mode 100644 index 00000000000..e93685e279d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_ctime.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(ts=[CTIME(1066507633)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1066507633], expr#20=[CTIME($t19)], ts=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_dur2sec.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_dur2sec.yaml new file mode 100644 index 00000000000..3f01b02a624 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_dur2sec.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(d=[DUR2SEC('01:23:45':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['01:23:45':VARCHAR], expr#20=[DUR2SEC($t19)], d=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mktime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mktime.yaml new file mode 100644 index 00000000000..2367ea48feb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mktime.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(d=[MKTIME('10/18/2003 20:07:13':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['10/18/2003 20:07:13':VARCHAR], expr#20=[MKTIME($t19)], d=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mstime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mstime.yaml new file mode 100644 index 00000000000..9bd873d1b3e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_mstime.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(t=[MSTIME('03:45.5':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['03:45.5':VARCHAR], expr#20=[MSTIME($t19)], t=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_ignored.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_ignored.yaml new file mode 100644 index 00000000000..79f52ecc188 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_ignored.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_multiple.yaml new file mode 100644 index 00000000000..7ec67fd0fbb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_multiple.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_single.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_single.yaml new file mode 100644 index 00000000000..2ff219d2464 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_single.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], dir0=[DESC-nulls-last]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$8], dir0=[DESC-nulls-last]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml index 4cb67a380a5..42e82eca514 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_output.yaml @@ -2,22 +2,24 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age2=[$2]) - LogicalFilter(condition=[<=($3, 1)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2)]) - LogicalFilter(condition=[IS NOT NULL($2)]) - LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) - LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) - LogicalProject(avg_age=[$2], state=[$0], city=[$1]) - LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) - LogicalProject(state=[$7], city=[$5], age=[$8]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $1 NULLS FIRST)]) + LogicalFilter(condition=[IS NOT NULL($2)]) + LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)]) + LogicalProject(avg_age=[$2], state=[$0], city=[$1]) + LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)]) + LogicalProject(state=[$7], city=[$5], age=[$8]) + LogicalFilter(condition=[>($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], age2=[$t1], $condition=[$t4]) - EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[2], expr#11=[+($t9, $t10)], expr#12=[IS NOT NULL($t8)], state=[$t1], age2=[$t11], $condition=[$t12]) - EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) - EnumerableAggregate(group=[{5, 7}], agg#0=[$SUM0($8)], agg#1=[COUNT($8)]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[>($t8, $t17)], proj#0..16=[{exprs}], $condition=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..2=[{inputs}], age2=[$t1]) + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) + EnumerableWindow(window#0=[window(partition {1} order by [1 ASC-nulls-first] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[2], expr#11=[+($t9, $t10)], expr#12=[IS NOT NULL($t8)], state=[$t1], age2=[$t11], $condition=[$t12]) + EnumerableAggregate(group=[{5, 7}], agg#0=[$SUM0($8)], agg#1=[COUNT($8)]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[>($t8, $t17)], proj#0..16=[{exprs}], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_ignored.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_ignored.yaml new file mode 100644 index 00000000000..0fb2d7e597d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_ignored.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + EnumerableLimit(fetch=[5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.yaml new file mode 100644 index 00000000000..8044fe03969 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.yaml new file mode 100644 index 00000000000..85acd7a9d54 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], dir0=[ASC-nulls-first]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_with_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_with_timestamp.yaml new file mode 100644 index 00000000000..e3095d13abc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_with_timestamp.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[DESC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3]) + LogicalSort(sort0=[$0], dir0=[DESC], fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) + EnumerableLimit(fetch=[5]) + EnumerableSort(sort0=[$0], dir0=[DESC]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml index 522e7922e68..c56cd5d1bce 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml @@ -3,30 +3,20 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalAggregate(group=[{}], avg_age=[AVG($0)]) - LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], avg_age=[AVG($20)]) + LogicalJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), IS NOT DISTINCT FROM($4, $19))], joinType=[left]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_gender__=[$4], __r_age__=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t18]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[0], expr#21=[=($t19, $t20)], expr#22=[null:BIGINT], expr#23=[CASE($t21, $t22, $t18)], expr#24=[CAST($t23):DOUBLE], expr#25=[/($t24, $t19)], proj#0..10=[{exprs}], avg_age=[$t25]) EnumerableLimit(fetch=[10000]) - EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) - EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], expr#20=[IS NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $f12=[$t19], $f15=[$t20]) + EnumerableSort(sort0=[$17], dir0=[ASC]) + EnumerableAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], agg#0=[$SUM0($20)], agg#1=[COUNT($20)]) + EnumerableNestedLoopJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), IS NOT DISTINCT FROM($4, $19))], joinType=[left]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) - EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) - EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) - EnumerableNestedLoopJoin(condition=[AND(>=($6, $2), <=($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2, 3}]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], expr#20=[IS NULL($t4)], gender=[$t4], __stream_seq__=[$t17], $f12=[$t19], $f15=[$t20]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..17=[{inputs}], gender=[$t4], age=[$t8], $2=[$t17]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + EnumerableCalc(expr#0..17=[{inputs}], __r_seq__=[$t17], __r_gender__=[$t4], __r_age__=[$t8]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml index a0634448b5e..d72bf7b429f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml @@ -3,29 +3,22 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalAggregate(group=[{}], avg_age=[AVG($0)]) - LogicalProject(age=[$8]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), =($4, $cor0.gender))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], avg_age=[AVG($20)]) + LogicalJoin(condition=[AND(>=($18, -($17, 1)), <=($18, $17), =($4, $19))], joinType=[left]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_gender__=[$4], __r_age__=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableCalc(expr#0..19=[{inputs}], expr#20=[0], expr#21=[=($t19, $t20)], expr#22=[null:BIGINT], expr#23=[CASE($t21, $t22, $t18)], expr#24=[CAST($t23):DOUBLE], expr#25=[/($t24, $t19)], proj#0..10=[{exprs}], avg_age=[$t25]) EnumerableLimit(fetch=[10000]) - EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) - EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $f12=[$t19]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) - EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) - EnumerableHashJoin(condition=[AND(=($0, $3), >=($5, $2), <=($5, $1))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2}]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], gender=[$t4], __stream_seq__=[$t17], $f12=[$t19]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..17=[{inputs}], gender=[$t4], age=[$t8], $2=[$t17]) + EnumerableSort(sort0=[$17], dir0=[ASC]) + EnumerableAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}], agg#0=[$SUM0($20)], agg#1=[COUNT($20)]) + EnumerableMergeJoin(condition=[AND(=($4, $19), >=($18, -($17, 1)), <=($18, $17))], joinType=[left]) + EnumerableSort(sort0=[$4], dir0=[ASC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..17=[{inputs}], __r_seq__=[$t17], __r_gender__=[$t4], __r_age__=[$t8]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_union.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_union.yaml new file mode 100644 index 00000000000..22a9bb6b5bd --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_union.yaml @@ -0,0 +1,22 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(gender=[$4]) + LogicalUnion(all=[true]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[<($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[>=($8, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], gender=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableUnion(all=[true]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[<($t8, $t17)], gender=[$t4], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[30], expr#18=[>=($t8, $t17)], gender=[$t4], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/explainIT_format_not_pretty.json b/integ-test/src/test/resources/expectedOutput/explainIT_format_not_pretty.json index 59b723343ae..7350fb185b1 100644 --- a/integ-test/src/test/resources/expectedOutput/explainIT_format_not_pretty.json +++ b/integ-test/src/test/resources/expectedOutput/explainIT_format_not_pretty.json @@ -1 +1 @@ -{"from":0,"size":200,"_source":{"includes":["firstname"],"excludes":[]}} +{"from":0,"size":200,"_source":{"includes":["firstname"]}} diff --git a/integ-test/src/test/resources/expectedOutput/explainIT_format_pretty.json b/integ-test/src/test/resources/expectedOutput/explainIT_format_pretty.json index 925f4ec1eee..d35e1bcfe66 100644 --- a/integ-test/src/test/resources/expectedOutput/explainIT_format_pretty.json +++ b/integ-test/src/test/resources/expectedOutput/explainIT_format_pretty.json @@ -4,7 +4,6 @@ "_source" : { "includes" : [ "firstname" - ], - "excludes" : [ ] + ] } } diff --git a/integ-test/src/test/resources/expectedOutput/nested_loop_join_explain.json b/integ-test/src/test/resources/expectedOutput/nested_loop_join_explain.json index 74943bdc255..9517b40f2ce 100644 --- a/integ-test/src/test/resources/expectedOutput/nested_loop_join_explain.json +++ b/integ-test/src/test/resources/expectedOutput/nested_loop_join_explain.json @@ -74,7 +74,6 @@ }, "from": 0, "_source": { - "excludes": [], "includes": [ "firstname", "lastname", @@ -113,7 +112,6 @@ }, "from": 0, "_source": { - "excludes": [], "includes": [ "dog_name", "holdersName" diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml index c2b045b4600..009e8340e82 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml @@ -9,7 +9,7 @@ root: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ - ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"\ - }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]},\"sort\"\ + :[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml index 0e6f5549a56..ff068f90755 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml @@ -14,7 +14,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\":\"\ - asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + aws\",\"event\"]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml index 0e6f5549a56..ff068f90755 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml @@ -14,7 +14,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\":\"\ - asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + aws\",\"event\"]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml index c2b045b4600..009e8340e82 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml @@ -9,7 +9,7 @@ root: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ - ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"\ - }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]},\"sort\"\ + :[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml index 28d71ecf09e..0da2e4f09ca 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml @@ -9,6 +9,6 @@ root: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ - ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml index 16f1fcfa33a..f2f4109ba90 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml @@ -9,7 +9,7 @@ root: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ - ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]},\"sort\":[{\"@timestamp\":{\"order\":\"desc\",\"missing\":\"_last\"\ - }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]},\"sort\"\ + :[{\"@timestamp\":{\"order\":\"desc\",\"missing\":\"_last\"}}]}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml index 8f8424df6b7..c438832c466 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml @@ -14,7 +14,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\":\"\ - desc\",\"missing\":\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + aws\",\"event\"]},\"sort\":[{\"@timestamp\":{\"order\":\"desc\",\"missing\"\ + :\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml index 8f8424df6b7..c438832c466 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml @@ -14,7 +14,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\":\"\ - desc\",\"missing\":\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + aws\",\"event\"]},\"sort\":[{\"@timestamp\":{\"order\":\"desc\",\"missing\"\ + :\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml index 16f1fcfa33a..f2f4109ba90 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml @@ -9,7 +9,7 @@ root: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ - ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]},\"sort\":[{\"@timestamp\":{\"order\":\"desc\",\"missing\":\"_last\"\ - }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]},\"sort\"\ + :[{\"@timestamp\":{\"order\":\"desc\",\"missing\":\"_last\"}}]}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml index 1ec8b25e62b..a6fa716f048 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml @@ -19,6 +19,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}}],\"adjust_pure_negative\"\ :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"agent\",\"process\",\"\ log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\",\"\ - data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml index aa99c0c8636..af694705af0 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml @@ -15,6 +15,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + aws\",\"event\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml index 069db10a79d..d15160e320a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml @@ -20,6 +20,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}}],\"adjust_pure_negative\"\ :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"agent\",\"process\",\"\ log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\",\"\ - data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml index e3ba107ee04..c149f54c9a7 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml @@ -20,7 +20,7 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}}],\"adjust_pure_negative\"\ :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"agent\",\"process\",\"\ log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\",\"\ - data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"\ - }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]},\"sort\"\ + :[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml index 4406c961892..2757f0ba071 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml @@ -14,6 +14,6 @@ root: boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\"\ :{\"includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\"\ ,\"input\",\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\"\ - ,\"aws\",\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"aws\",\"event\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml index 44ffe43c5c7..9d54944387c 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml @@ -16,6 +16,5 @@ root: adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"\ agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"\ @timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\"\ - ,\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + ,\"event\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml index c17a49a4c2e..a4f89937e38 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml @@ -13,6 +13,6 @@ root: :30,\"include_lower\":true,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\"\ :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"agent\",\"process\",\"\ log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\",\"\ - data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml index d04271ee003..b9029ecfc0c 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml @@ -16,6 +16,5 @@ root: adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"\ agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"\ @timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\"\ - ,\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + ,\"event\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml index c0d63da12bf..85e7cec48d1 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml @@ -16,6 +16,5 @@ root: adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"\ agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"\ @timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\"\ - ,\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + ,\"event\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml index 5d49015ec62..5827e11a9f8 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml @@ -14,6 +14,5 @@ root: adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"\ agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"\ @timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\"\ - ,\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + ,\"event\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml index 0681c881ce9..cd1b0d08470 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml @@ -14,7 +14,6 @@ root: boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\"\ :{\"includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\"\ ,\"input\",\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\"\ - ,\"aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\"\ - :\"asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + ,\"aws\",\"event\"]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml index 1b9d3f2c246..9d65f44f438 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml @@ -14,7 +14,6 @@ root: boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\"\ :{\"includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\"\ ,\"input\",\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\"\ - ,\"aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\"\ - :\"desc\",\"missing\":\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + ,\"aws\",\"event\"]},\"sort\":[{\"@timestamp\":{\"order\":\"desc\",\"missing\"\ + :\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml index 28d71ecf09e..0da2e4f09ca 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml @@ -9,6 +9,6 @@ root: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ - ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml index 926319755f8..a42fb3fcb47 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml @@ -14,7 +14,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"meta.file\":{\"order\":\"\ - asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + aws\",\"event\"]},\"sort\":[{\"meta.file\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml index 926319755f8..a42fb3fcb47 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml @@ -14,7 +14,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"meta.file\":{\"order\":\"\ - asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + aws\",\"event\"]},\"sort\":[{\"meta.file\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml index 10228036927..8e8f5aa28b8 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml @@ -9,7 +9,7 @@ root: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ - ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]},\"sort\":[{\"metrics.size\":{\"order\":\"asc\",\"missing\":\"_first\"\ - }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]},\"sort\"\ + :[{\"metrics.size\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml index 1925583a9e0..a3ab2fe7238 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml @@ -15,7 +15,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"metrics.size\":{\"order\"\ - :\"asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + aws\",\"event\"]},\"sort\":[{\"metrics.size\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml index acf47c582d9..4ff698cea4a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml @@ -9,7 +9,7 @@ root: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ - ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]},\"sort\":[{\"metrics.size\":{\"order\":\"desc\",\"missing\":\"_last\"\ - }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"]},\"sort\"\ + :[{\"metrics.size\":{\"order\":\"desc\",\"missing\":\"_last\"}}]}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml index 00b13e06628..7ec9222cf65 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml @@ -15,7 +15,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"metrics.size\":{\"order\"\ - :\"desc\",\"missing\":\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" - children: [] \ No newline at end of file + aws\",\"event\"]},\"sort\":[{\"metrics.size\":{\"order\":\"desc\",\"missing\"\ + :\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml index 56f97a7481e..75fb21c2430 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml @@ -11,6 +11,6 @@ root: value\":\"/var/log/messages/birdknight\",\"boost\":1.0}}},\"_source\":{\"\ includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\"\ ,\"input\",\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\"\ - ,\"aws\",\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + ,\"aws\",\"event\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q20.yaml b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q20.yaml index 243190d4116..993948b5629 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q20.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q20.yaml @@ -8,6 +8,6 @@ root: request: "OpenSearchQueryRequest(indexName=hits, sourceBuilder={\"from\":0,\"\ size\":10000,\"timeout\":\"1m\",\"query\":{\"term\":{\"UserID\":{\"value\"\ :435090932899640449,\"boost\":1.0}}},\"_source\":{\"includes\":[\"UserID\"\ - ],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + ]}}, needClean=true, searchDone=false, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q24.yaml b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q24.yaml index da2a8eae916..fb426a073fd 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q24.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q24.yaml @@ -46,7 +46,7 @@ root: ,\"IsMobile\",\"Age\",\"SocialSourceNetworkID\",\"OpenerName\",\"OS\",\"\ IsNotBounce\",\"Referer\",\"NetMinor\",\"Title\",\"NetMajor\",\"IPNetworkID\"\ ,\"FetchTiming\",\"SocialNetwork\",\"SocialSourcePage\",\"CounterID\",\"\ - WindowClientWidth\"],\"excludes\":[]},\"sort\":[{\"EventTime\":{\"order\"\ + WindowClientWidth\"]},\"sort\":[{\"EventTime\":{\"order\"\ :\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q25.yaml b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q25.yaml index 07c3979ea39..c9378edd2f4 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q25.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q25.yaml @@ -19,7 +19,7 @@ root: :{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\ \":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IAMW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLlJlZmVyZW5jZUV4cHJlc3Npb26rRO9cEgeF1gIABEwABGF0dHJ0ABJMamF2YS9sYW5nL1N0cmluZztMAAVwYXRoc3EAfgABTAAHcmF3UGF0aHEAfgALTAAEdHlwZXEAfgAFeHB0AAxTZWFyY2hQaHJhc2VzcgAaamF2YS51dGlsLkFycmF5cyRBcnJheUxpc3TZpDy+zYgG0gIAAVsAAWF0ABNbTGphdmEvbGFuZy9PYmplY3Q7eHB1cgATW0xqYXZhLmxhbmcuU3RyaW5nO63SVufpHXtHAgAAeHAAAAABcQB+AA1xAH4ADX5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABlNUUklOR3NyAC9vcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5MaXRlcmFsRXhwcmVzc2lvbkVCLfCMx4IkAgABTAAJZXhwclZhbHVldAApTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTt4cHNyAC1vcmcub3BlbnNlYXJjaC5zcWwuZGF0YS5tb2RlbC5FeHByU3RyaW5nVmFsdWUAQTIlc4kOEwIAAUwABXZhbHVlcQB+AAt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwdAAAeHNyADNvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbk5hbWULqDhNzvZnlwIAAUwADGZ1bmN0aW9uTmFtZXEAfgALeHB0AAIhPXEAfgAJc3IAIWphdmEubGFuZy5pbnZva2UuU2VyaWFsaXplZExhbWJkYW9h0JQsKTaFAgAKSQAOaW1wbE1ldGhvZEtpbmRbAAxjYXB0dXJlZEFyZ3NxAH4AD0wADmNhcHR1cmluZ0NsYXNzdAARTGphdmEvbGFuZy9DbGFzcztMABhmdW5jdGlvbmFsSW50ZXJmYWNlQ2xhc3NxAH4AC0wAHWZ1bmN0aW9uYWxJbnRlcmZhY2VNZXRob2ROYW1lcQB+AAtMACJmdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kU2lnbmF0dXJlcQB+AAtMAAlpbXBsQ2xhc3NxAH4AC0wADmltcGxNZXRob2ROYW1lcQB+AAtMABNpbXBsTWV0aG9kU2lnbmF0dXJlcQB+AAtMABZpbnN0YW50aWF0ZWRNZXRob2RUeXBlcQB+AAt4cAAAAAZ1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAABc3EAfgAhAAAABnVxAH4AJAAAAAFzcQB+ACEAAAAGdXEAfgAkAAAAAHZyAElvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5vcGVyYXRvci5wcmVkaWNhdGUuQmluYXJ5UHJlZGljYXRlT3BlcmF0b3JzAAAAAAAAAAAAAAB4cHQAPW9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb250AAVhcHBseXQAOChMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDspTGphdmEvbGFuZy9PYmplY3Q7dABJb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vb3BlcmF0b3IvcHJlZGljYXRlL0JpbmFyeVByZWRpY2F0ZU9wZXJhdG9yc3QAGmxhbWJkYSRub3RFcXVhbCQ5NTA0OGZjMSQxdAB9KExvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AMXZyADJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTAAAAAAAAAAAAAAAeHBxAH4ALHEAfgAtcQB+AC50ADJvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvbkRTTHQAJWxhbWJkYSRudWxsTWlzc2luZ0hhbmRsaW5nJGE1MDA1MjgxJDF0ALwoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb247TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAxcQB+ADN0AD5vcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbnEAfgAtdABKKExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDtxAH4ANHQAFmxhbWJkYSRpbXBsJGEwZmIzNGQ0JDF0APcoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb247TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7dAC4KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAfc3IAOW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uUHJvcGVydGllc888WWObqPmzAgADTAANY3VycmVudFpvbmVJZHQAEkxqYXZhL3RpbWUvWm9uZUlkO0wACm5vd0luc3RhbnR0ABNMamF2YS90aW1lL0luc3RhbnQ7TAAJcXVlcnlUeXBldAAnTG9yZy9vcGVuc2VhcmNoL3NxbC9leGVjdXRvci9RdWVyeVR5cGU7eHBzcgANamF2YS50aW1lLlNlcpVdhLobIkiyDAAAeHB3AggAeHNxAH4AQXcNAgAAAABpCZsWIFYPmHh+cgAlb3JnLm9wZW5zZWFyY2guc3FsLmV4ZWN1dG9yLlF1ZXJ5VHlwZQAAAAAAAAAAEgAAeHEAfgAUdAADUFBMfnEAfgATdAAHQk9PTEVBTg==\\\ \"}\",\"lang\":\"opensearch_compounded_script\"},\"boost\":1.0}},\"\ - _source\":{\"includes\":[\"SearchPhrase\"],\"excludes\":[]},\"sort\"\ + _source\":{\"includes\":[\"SearchPhrase\"]},\"sort\"\ :[{\"EventTime\":{\"order\":\"asc\",\"missing\":\"_first\"}}]},\ \ needClean=true, searchDone=false, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" diff --git a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q26.yaml b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q26.yaml index b7437d23381..0fc35cfc271 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q26.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q26.yaml @@ -23,7 +23,7 @@ root: :{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\ \":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IAMW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLlJlZmVyZW5jZUV4cHJlc3Npb26rRO9cEgeF1gIABEwABGF0dHJ0ABJMamF2YS9sYW5nL1N0cmluZztMAAVwYXRoc3EAfgABTAAHcmF3UGF0aHEAfgALTAAEdHlwZXEAfgAFeHB0AAxTZWFyY2hQaHJhc2VzcgAaamF2YS51dGlsLkFycmF5cyRBcnJheUxpc3TZpDy+zYgG0gIAAVsAAWF0ABNbTGphdmEvbGFuZy9PYmplY3Q7eHB1cgATW0xqYXZhLmxhbmcuU3RyaW5nO63SVufpHXtHAgAAeHAAAAABcQB+AA1xAH4ADX5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABlNUUklOR3NyAC9vcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5MaXRlcmFsRXhwcmVzc2lvbkVCLfCMx4IkAgABTAAJZXhwclZhbHVldAApTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTt4cHNyAC1vcmcub3BlbnNlYXJjaC5zcWwuZGF0YS5tb2RlbC5FeHByU3RyaW5nVmFsdWUAQTIlc4kOEwIAAUwABXZhbHVlcQB+AAt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwdAAAeHNyADNvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbk5hbWULqDhNzvZnlwIAAUwADGZ1bmN0aW9uTmFtZXEAfgALeHB0AAIhPXEAfgAJc3IAIWphdmEubGFuZy5pbnZva2UuU2VyaWFsaXplZExhbWJkYW9h0JQsKTaFAgAKSQAOaW1wbE1ldGhvZEtpbmRbAAxjYXB0dXJlZEFyZ3NxAH4AD0wADmNhcHR1cmluZ0NsYXNzdAARTGphdmEvbGFuZy9DbGFzcztMABhmdW5jdGlvbmFsSW50ZXJmYWNlQ2xhc3NxAH4AC0wAHWZ1bmN0aW9uYWxJbnRlcmZhY2VNZXRob2ROYW1lcQB+AAtMACJmdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kU2lnbmF0dXJlcQB+AAtMAAlpbXBsQ2xhc3NxAH4AC0wADmltcGxNZXRob2ROYW1lcQB+AAtMABNpbXBsTWV0aG9kU2lnbmF0dXJlcQB+AAtMABZpbnN0YW50aWF0ZWRNZXRob2RUeXBlcQB+AAt4cAAAAAZ1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAABc3EAfgAhAAAABnVxAH4AJAAAAAFzcQB+ACEAAAAGdXEAfgAkAAAAAHZyAElvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5vcGVyYXRvci5wcmVkaWNhdGUuQmluYXJ5UHJlZGljYXRlT3BlcmF0b3JzAAAAAAAAAAAAAAB4cHQAPW9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb250AAVhcHBseXQAOChMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDspTGphdmEvbGFuZy9PYmplY3Q7dABJb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vb3BlcmF0b3IvcHJlZGljYXRlL0JpbmFyeVByZWRpY2F0ZU9wZXJhdG9yc3QAGmxhbWJkYSRub3RFcXVhbCQ5NTA0OGZjMSQxdAB9KExvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AMXZyADJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTAAAAAAAAAAAAAAAeHBxAH4ALHEAfgAtcQB+AC50ADJvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvbkRTTHQAJWxhbWJkYSRudWxsTWlzc2luZ0hhbmRsaW5nJGE1MDA1MjgxJDF0ALwoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb247TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAxcQB+ADN0AD5vcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbnEAfgAtdABKKExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDtxAH4ANHQAFmxhbWJkYSRpbXBsJGEwZmIzNGQ0JDF0APcoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb247TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7dAC4KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAfc3IAOW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uUHJvcGVydGllc888WWObqPmzAgADTAANY3VycmVudFpvbmVJZHQAEkxqYXZhL3RpbWUvWm9uZUlkO0wACm5vd0luc3RhbnR0ABNMamF2YS90aW1lL0luc3RhbnQ7TAAJcXVlcnlUeXBldAAnTG9yZy9vcGVuc2VhcmNoL3NxbC9leGVjdXRvci9RdWVyeVR5cGU7eHBzcgANamF2YS50aW1lLlNlcpVdhLobIkiyDAAAeHB3AggAeHNxAH4AQXcNAgAAAABpCZsWIgNaCHh+cgAlb3JnLm9wZW5zZWFyY2guc3FsLmV4ZWN1dG9yLlF1ZXJ5VHlwZQAAAAAAAAAAEgAAeHEAfgAUdAADUFBMfnEAfgATdAAHQk9PTEVBTg==\\\ \"}\",\"lang\":\"opensearch_compounded_script\"},\"boost\":1.0}},\"\ - _source\":{\"includes\":[\"SearchPhrase\"],\"excludes\":[]}}, needClean=true,\ + _source\":{\"includes\":[\"SearchPhrase\"]}}, needClean=true,\ \ searchDone=false, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q27.yaml b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q27.yaml index b009efd7ed4..4663fb734be 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q27.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/clickbench/q27.yaml @@ -19,7 +19,7 @@ root: :{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\ \":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IAMW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLlJlZmVyZW5jZUV4cHJlc3Npb26rRO9cEgeF1gIABEwABGF0dHJ0ABJMamF2YS9sYW5nL1N0cmluZztMAAVwYXRoc3EAfgABTAAHcmF3UGF0aHEAfgALTAAEdHlwZXEAfgAFeHB0AAxTZWFyY2hQaHJhc2VzcgAaamF2YS51dGlsLkFycmF5cyRBcnJheUxpc3TZpDy+zYgG0gIAAVsAAWF0ABNbTGphdmEvbGFuZy9PYmplY3Q7eHB1cgATW0xqYXZhLmxhbmcuU3RyaW5nO63SVufpHXtHAgAAeHAAAAABcQB+AA1xAH4ADX5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABlNUUklOR3NyAC9vcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5MaXRlcmFsRXhwcmVzc2lvbkVCLfCMx4IkAgABTAAJZXhwclZhbHVldAApTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTt4cHNyAC1vcmcub3BlbnNlYXJjaC5zcWwuZGF0YS5tb2RlbC5FeHByU3RyaW5nVmFsdWUAQTIlc4kOEwIAAUwABXZhbHVlcQB+AAt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwdAAAeHNyADNvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbk5hbWULqDhNzvZnlwIAAUwADGZ1bmN0aW9uTmFtZXEAfgALeHB0AAIhPXEAfgAJc3IAIWphdmEubGFuZy5pbnZva2UuU2VyaWFsaXplZExhbWJkYW9h0JQsKTaFAgAKSQAOaW1wbE1ldGhvZEtpbmRbAAxjYXB0dXJlZEFyZ3NxAH4AD0wADmNhcHR1cmluZ0NsYXNzdAARTGphdmEvbGFuZy9DbGFzcztMABhmdW5jdGlvbmFsSW50ZXJmYWNlQ2xhc3NxAH4AC0wAHWZ1bmN0aW9uYWxJbnRlcmZhY2VNZXRob2ROYW1lcQB+AAtMACJmdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kU2lnbmF0dXJlcQB+AAtMAAlpbXBsQ2xhc3NxAH4AC0wADmltcGxNZXRob2ROYW1lcQB+AAtMABNpbXBsTWV0aG9kU2lnbmF0dXJlcQB+AAtMABZpbnN0YW50aWF0ZWRNZXRob2RUeXBlcQB+AAt4cAAAAAZ1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAABc3EAfgAhAAAABnVxAH4AJAAAAAFzcQB+ACEAAAAGdXEAfgAkAAAAAHZyAElvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5vcGVyYXRvci5wcmVkaWNhdGUuQmluYXJ5UHJlZGljYXRlT3BlcmF0b3JzAAAAAAAAAAAAAAB4cHQAPW9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb250AAVhcHBseXQAOChMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDspTGphdmEvbGFuZy9PYmplY3Q7dABJb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vb3BlcmF0b3IvcHJlZGljYXRlL0JpbmFyeVByZWRpY2F0ZU9wZXJhdG9yc3QAGmxhbWJkYSRub3RFcXVhbCQ5NTA0OGZjMSQxdAB9KExvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AMXZyADJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTAAAAAAAAAAAAAAAeHBxAH4ALHEAfgAtcQB+AC50ADJvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvbkRTTHQAJWxhbWJkYSRudWxsTWlzc2luZ0hhbmRsaW5nJGE1MDA1MjgxJDF0ALwoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb247TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAxcQB+ADN0AD5vcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbnEAfgAtdABKKExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDtxAH4ANHQAFmxhbWJkYSRpbXBsJGEwZmIzNGQ0JDF0APcoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUJpRnVuY3Rpb247TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7dAC4KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAfc3IAOW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uUHJvcGVydGllc888WWObqPmzAgADTAANY3VycmVudFpvbmVJZHQAEkxqYXZhL3RpbWUvWm9uZUlkO0wACm5vd0luc3RhbnR0ABNMamF2YS90aW1lL0luc3RhbnQ7TAAJcXVlcnlUeXBldAAnTG9yZy9vcGVuc2VhcmNoL3NxbC9leGVjdXRvci9RdWVyeVR5cGU7eHBzcgANamF2YS50aW1lLlNlcpVdhLobIkiyDAAAeHB3AggAeHNxAH4AQXcNAgAAAABpCZsWJAGi2Hh+cgAlb3JnLm9wZW5zZWFyY2guc3FsLmV4ZWN1dG9yLlF1ZXJ5VHlwZQAAAAAAAAAAEgAAeHEAfgAUdAADUFBMfnEAfgATdAAHQk9PTEVBTg==\\\ \"}\",\"lang\":\"opensearch_compounded_script\"},\"boost\":1.0}},\"\ - _source\":{\"includes\":[\"SearchPhrase\"],\"excludes\":[]},\"sort\"\ + _source\":{\"includes\":[\"SearchPhrase\"]},\"sort\"\ :[{\"EventTime\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"\ SearchPhrase\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean=true,\ \ searchDone=false, pitId=*,\ diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml index 7ff290348e9..bf0f03a4cbc 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml @@ -18,7 +18,7 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ - _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ - excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"]}},\ + \ pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml index 3a711878435..72661145e99 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml @@ -18,7 +18,7 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ - _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ - excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"]}},\ + \ pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml index 7ff290348e9..bf0f03a4cbc 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml @@ -18,7 +18,7 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ - _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ - excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"]}},\ + \ pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_compare_ip.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_compare_ip.json index 7afd6497b9a..e6a8c31265e 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_compare_ip.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_compare_ip.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_weblogs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"host\":{\"from\":\"1.1.1.1\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"host\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_weblogs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"host\":{\"from\":\"1.1.1.1\",\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"host\"]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_compare_ipv6_swapped.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_compare_ipv6_swapped.json index ff004cfeb4c..0b97c68619d 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_compare_ipv6_swapped.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_compare_ipv6_swapped.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_weblogs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IANG9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uRFNMJDI9znTUIQE9bAIABUwADXZhbCRhcmd1bWVudHNxAH4AAUwADHZhbCRmdW5jdGlvbnQAP0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0wAEHZhbCRmdW5jdGlvbk5hbWVxAH4AA0wAFnZhbCRmdW5jdGlvblByb3BlcnRpZXNxAH4ABEwADnZhbCRyZXR1cm5UeXBlcQB+AAV4cQB+AAZzcQB+AAgAAAABdwQAAAABc3IAL29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLkxpdGVyYWxFeHByZXNzaW9uRUIt8IzHgiQCAAFMAAlleHByVmFsdWV0AClMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3hwc3IALW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLm1vZGVsLkV4cHJTdHJpbmdWYWx1ZQBBMiVziQ4TAgABTAAFdmFsdWV0ABJMamF2YS9sYW5nL1N0cmluZzt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwdAALOjpmZmZmOjEyMzR4c3IAM29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uTmFtZQuoOE3O9meXAgABTAAMZnVuY3Rpb25OYW1lcQB+ABJ4cHQACmNhc3RfdG9faXBxAH4ADXNyACFqYXZhLmxhbmcuaW52b2tlLlNlcmlhbGl6ZWRMYW1iZGFvYdCULCk2hQIACkkADmltcGxNZXRob2RLaW5kWwAMY2FwdHVyZWRBcmdzdAATW0xqYXZhL2xhbmcvT2JqZWN0O0wADmNhcHR1cmluZ0NsYXNzdAARTGphdmEvbGFuZy9DbGFzcztMABhmdW5jdGlvbmFsSW50ZXJmYWNlQ2xhc3NxAH4AEkwAHWZ1bmN0aW9uYWxJbnRlcmZhY2VNZXRob2ROYW1lcQB+ABJMACJmdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kU2lnbmF0dXJlcQB+ABJMAAlpbXBsQ2xhc3NxAH4AEkwADmltcGxNZXRob2ROYW1lcQB+ABJMABNpbXBsTWV0aG9kU2lnbmF0dXJlcQB+ABJMABZpbnN0YW50aWF0ZWRNZXRob2RUeXBlcQB+ABJ4cAAAAAZ1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAABc3EAfgAZAAAABnVxAH4AHQAAAAFzcQB+ABkAAAAGdXEAfgAdAAAAAHZyAEBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5vcGVyYXRvci5jb252ZXJ0LlR5cGVDYXN0T3BlcmF0b3JzAAAAAAAAAAAAAAB4cHQAO29yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUZ1bmN0aW9udAAFYXBwbHl0ACYoTGphdmEvbGFuZy9PYmplY3Q7KUxqYXZhL2xhbmcvT2JqZWN0O3QAQG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL29wZXJhdG9yL2NvbnZlcnQvVHlwZUNhc3RPcGVyYXRvcnN0ABpsYW1iZGEkY2FzdFRvSXAkMTJjN2RjNDgkMXQAVChMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAqdnIAMm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uRFNMAAAAAAAAAAAAAAB4cHEAfgAlcQB+ACZxAH4AJ3QAMm9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uRFNMdAAlbGFtYmRhJG51bGxNaXNzaW5nSGFuZGxpbmckODc4MDY5YzgkMXQAkShMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlRnVuY3Rpb247TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AKnEAfgAsdAA9b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbnEAfgAmdAA4KExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDtxAH4ALXQAFmxhbWJkYSRpbXBsJDhkNTg2Y2RjJDF0AMwoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTt0AI8oTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAXc3IAOW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uUHJvcGVydGllc888WWObqPmzAgADTAANY3VycmVudFpvbmVJZHQAEkxqYXZhL3RpbWUvWm9uZUlkO0wACm5vd0luc3RhbnR0ABNMamF2YS90aW1lL0luc3RhbnQ7TAAJcXVlcnlUeXBldAAnTG9yZy9vcGVuc2VhcmNoL3NxbC9leGVjdXRvci9RdWVyeVR5cGU7eHBzcgANamF2YS50aW1lLlNlcpVdhLobIkiyDAAAeHB3AggAeHNxAH4AOncNAgAAAABoifJmKvyT4Hh+cgAlb3JnLm9wZW5zZWFyY2guc3FsLmV4ZWN1dG9yLlF1ZXJ5VHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAA1BQTH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHEAfgA+dAACSVBzcgAxb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uUmVmZXJlbmNlRXhwcmVzc2lvbqtE71wSB4XWAgAETAAEYXR0cnEAfgASTAAFcGF0aHNxAH4AAUwAB3Jhd1BhdGhxAH4AEkwABHR5cGVxAH4ABXhwdAAEaG9zdHNyABpqYXZhLnV0aWwuQXJyYXlzJEFycmF5TGlzdNmkPL7NiAbSAgABWwABYXEAfgAaeHB1cgATW0xqYXZhLmxhbmcuU3RyaW5nO63SVufpHXtHAgAAeHAAAAABcQB+AEZxAH4ARnEAfgBCeHNxAH4AFnQAAjw9cQB+AAlzcQB+ABkAAAAGdXEAfgAdAAAAAXNxAH4AGQAAAAZ1cQB+AB0AAAABc3EAfgAZAAAABnVxAH4AHQAAAAB2cgBJb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24ub3BlcmF0b3IucHJlZGljYXRlLkJpbmFyeVByZWRpY2F0ZU9wZXJhdG9ycwAAAAAAAAAAAAAAeHBxAH4AMHEAfgAmcQB+ADF0AElvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9vcGVyYXRvci9wcmVkaWNhdGUvQmluYXJ5UHJlZGljYXRlT3BlcmF0b3JzdAAVbGFtYmRhJGx0ZSQ5NTA0OGZjMSQxdAB9KExvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AV3EAfgAscQB+ADBxAH4AJnEAfgAxcQB+AC10ACVsYW1iZGEkbnVsbE1pc3NpbmdIYW5kbGluZyRhNTAwNTI4MSQxdAC8KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AV3EAfgAsdAA+b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlVHJpRnVuY3Rpb25xAH4AJnQASihMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDspTGphdmEvbGFuZy9PYmplY3Q7cQB+AC10ABZsYW1iZGEkaW1wbCRhMGZiMzRkNCQxdAD3KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3QAuChMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vRnVuY3Rpb25Qcm9wZXJ0aWVzO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AS3EAfgA5fnEAfgBBdAAHQk9PTEVBTg==\\\"}\",\"lang\":\"opensearch_compounded_script\"},\"boost\":1.0}},\"_source\":{\"includes\":[\"host\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_weblogs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IANG9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uRFNMJDI9znTUIQE9bAIABUwADXZhbCRhcmd1bWVudHNxAH4AAUwADHZhbCRmdW5jdGlvbnQAP0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0wAEHZhbCRmdW5jdGlvbk5hbWVxAH4AA0wAFnZhbCRmdW5jdGlvblByb3BlcnRpZXNxAH4ABEwADnZhbCRyZXR1cm5UeXBlcQB+AAV4cQB+AAZzcQB+AAgAAAABdwQAAAABc3IAL29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLkxpdGVyYWxFeHByZXNzaW9uRUIt8IzHgiQCAAFMAAlleHByVmFsdWV0AClMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3hwc3IALW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLm1vZGVsLkV4cHJTdHJpbmdWYWx1ZQBBMiVziQ4TAgABTAAFdmFsdWV0ABJMamF2YS9sYW5nL1N0cmluZzt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwdAALOjpmZmZmOjEyMzR4c3IAM29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uTmFtZQuoOE3O9meXAgABTAAMZnVuY3Rpb25OYW1lcQB+ABJ4cHQACmNhc3RfdG9faXBxAH4ADXNyACFqYXZhLmxhbmcuaW52b2tlLlNlcmlhbGl6ZWRMYW1iZGFvYdCULCk2hQIACkkADmltcGxNZXRob2RLaW5kWwAMY2FwdHVyZWRBcmdzdAATW0xqYXZhL2xhbmcvT2JqZWN0O0wADmNhcHR1cmluZ0NsYXNzdAARTGphdmEvbGFuZy9DbGFzcztMABhmdW5jdGlvbmFsSW50ZXJmYWNlQ2xhc3NxAH4AEkwAHWZ1bmN0aW9uYWxJbnRlcmZhY2VNZXRob2ROYW1lcQB+ABJMACJmdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kU2lnbmF0dXJlcQB+ABJMAAlpbXBsQ2xhc3NxAH4AEkwADmltcGxNZXRob2ROYW1lcQB+ABJMABNpbXBsTWV0aG9kU2lnbmF0dXJlcQB+ABJMABZpbnN0YW50aWF0ZWRNZXRob2RUeXBlcQB+ABJ4cAAAAAZ1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAABc3EAfgAZAAAABnVxAH4AHQAAAAFzcQB+ABkAAAAGdXEAfgAdAAAAAHZyAEBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5vcGVyYXRvci5jb252ZXJ0LlR5cGVDYXN0T3BlcmF0b3JzAAAAAAAAAAAAAAB4cHQAO29yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUZ1bmN0aW9udAAFYXBwbHl0ACYoTGphdmEvbGFuZy9PYmplY3Q7KUxqYXZhL2xhbmcvT2JqZWN0O3QAQG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL29wZXJhdG9yL2NvbnZlcnQvVHlwZUNhc3RPcGVyYXRvcnN0ABpsYW1iZGEkY2FzdFRvSXAkMTJjN2RjNDgkMXQAVChMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAqdnIAMm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uRFNMAAAAAAAAAAAAAAB4cHEAfgAlcQB+ACZxAH4AJ3QAMm9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uRFNMdAAlbGFtYmRhJG51bGxNaXNzaW5nSGFuZGxpbmckODc4MDY5YzgkMXQAkShMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlRnVuY3Rpb247TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AKnEAfgAsdAA9b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbnEAfgAmdAA4KExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDtxAH4ALXQAFmxhbWJkYSRpbXBsJDhkNTg2Y2RjJDF0AMwoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTt0AI8oTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgAXc3IAOW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uUHJvcGVydGllc888WWObqPmzAgADTAANY3VycmVudFpvbmVJZHQAEkxqYXZhL3RpbWUvWm9uZUlkO0wACm5vd0luc3RhbnR0ABNMamF2YS90aW1lL0luc3RhbnQ7TAAJcXVlcnlUeXBldAAnTG9yZy9vcGVuc2VhcmNoL3NxbC9leGVjdXRvci9RdWVyeVR5cGU7eHBzcgANamF2YS50aW1lLlNlcpVdhLobIkiyDAAAeHB3AggAeHNxAH4AOncNAgAAAABoifJmKvyT4Hh+cgAlb3JnLm9wZW5zZWFyY2guc3FsLmV4ZWN1dG9yLlF1ZXJ5VHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAA1BQTH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHEAfgA+dAACSVBzcgAxb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uUmVmZXJlbmNlRXhwcmVzc2lvbqtE71wSB4XWAgAETAAEYXR0cnEAfgASTAAFcGF0aHNxAH4AAUwAB3Jhd1BhdGhxAH4AEkwABHR5cGVxAH4ABXhwdAAEaG9zdHNyABpqYXZhLnV0aWwuQXJyYXlzJEFycmF5TGlzdNmkPL7NiAbSAgABWwABYXEAfgAaeHB1cgATW0xqYXZhLmxhbmcuU3RyaW5nO63SVufpHXtHAgAAeHAAAAABcQB+AEZxAH4ARnEAfgBCeHNxAH4AFnQAAjw9cQB+AAlzcQB+ABkAAAAGdXEAfgAdAAAAAXNxAH4AGQAAAAZ1cQB+AB0AAAABc3EAfgAZAAAABnVxAH4AHQAAAAB2cgBJb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24ub3BlcmF0b3IucHJlZGljYXRlLkJpbmFyeVByZWRpY2F0ZU9wZXJhdG9ycwAAAAAAAAAAAAAAeHBxAH4AMHEAfgAmcQB+ADF0AElvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9vcGVyYXRvci9wcmVkaWNhdGUvQmluYXJ5UHJlZGljYXRlT3BlcmF0b3JzdAAVbGFtYmRhJGx0ZSQ5NTA0OGZjMSQxdAB9KExvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AV3EAfgAscQB+ADBxAH4AJnEAfgAxcQB+AC10ACVsYW1iZGEkbnVsbE1pc3NpbmdIYW5kbGluZyRhNTAwNTI4MSQxdAC8KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AV3EAfgAsdAA+b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlVHJpRnVuY3Rpb25xAH4AJnQASihMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDspTGphdmEvbGFuZy9PYmplY3Q7cQB+AC10ABZsYW1iZGEkaW1wbCRhMGZiMzRkNCQxdAD3KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3QAuChMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vRnVuY3Rpb25Qcm9wZXJ0aWVzO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AS3EAfgA5fnEAfgBBdAAHQk9PTEVBTg==\\\"}\",\"lang\":\"opensearch_compounded_script\"},\"boost\":1.0}},\"_source\":{\"includes\":[\"host\"]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push.yaml index f116e2ae352..242cc1efcf0 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push.yaml @@ -13,7 +13,6 @@ root: :true,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\"\ :true,\"boost\":1.0}},{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\"\ :false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\"\ - :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}},\ - \ needClean=true, searchDone=false, pitId=*,\ + :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_date_string.yaml index 5bf06c10265..80be17c49a3 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_date_string.yaml @@ -16,6 +16,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_date_formats,\ \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ - _source\":{\"includes\":[\"yyyy-MM-dd\"],\"excludes\":[]}}, pitId=*,\ + _source\":{\"includes\":[\"yyyy-MM-dd\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_time_string.yaml index 0a06e142431..64597cc9db0 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_time_string.yaml @@ -16,6 +16,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_date_formats,\ \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ - _source\":{\"includes\":[\"custom_time\"],\"excludes\":[]}}, pitId=*,\ + _source\":{\"includes\":[\"custom_time\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_timestamp_string.yaml index 778fdeaa1c7..5f1ae1a53a5 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_timestamp_string.yaml @@ -14,6 +14,6 @@ root: include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\"\ :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\"\ ,\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\"\ - ,\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, pitId=*,\ + ,\"employer\",\"state\",\"age\",\"email\",\"male\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_then_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_then_limit_push.yaml index 66c7729f993..aab562ac0c8 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_then_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_then_limit_push.yaml @@ -8,6 +8,6 @@ root: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"query\":{\"\ range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\"\ - :true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\"\ - :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"]}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.yaml index 766e6eb5f22..50858397a9a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.yaml @@ -11,6 +11,6 @@ root: :{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"boost\"\ :1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\"\ ,\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\"\ - ,\"lastname\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + ,\"lastname\"]}}, needClean=true, searchDone=false, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml index ff157e036d0..65a666f319a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml @@ -11,6 +11,6 @@ root: :{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\"\ :true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"\ firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"\ - state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, pitId=*,\ + state\",\"age\",\"email\",\"lastname\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_5_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_5_push.yaml index 7ce50a2b37c..b530d714eba 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_5_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_5_push.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + includes\":[\"age\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10from1_10from2_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10from1_10from2_push.yaml index 53ed94585f0..3ce79b70e15 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10from1_10from2_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10from1_10from2_push.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":3,\"size\":8,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + includes\":[\"age\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_5_10_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_5_10_push.yaml index 7ce50a2b37c..b530d714eba 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_5_10_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_5_10_push.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + includes\":[\"age\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_offsets_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_offsets_push.yaml index 1d4cd474476..27c5ca8df30 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_offsets_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_offsets_push.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":3,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + includes\":[\"age\"]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_sort_push.yaml index f2daa15d506..e210bccf386 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_sort_push.yaml @@ -7,7 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\"\ - ,\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + includes\":[\"age\"]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_fields_relevance_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_fields_relevance_push.json index 98dc18c5a3e..b7730266fdf 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_fields_relevance_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_fields_relevance_push.json @@ -7,7 +7,7 @@ "children": [{ "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"simple_query_string\":{\"query\":\"gmail\",\"fields\":[\"name^4.0\",\"email^1.0\"],\"analyzer\":\"english\",\"flags\":-1,\"default_operator\":\"or\",\"analyze_wildcard\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"simple_query_string\":{\"query\":\"gmail\",\"fields\":[\"name^4.0\",\"email^1.0\"],\"analyzer\":\"english\",\"flags\":-1,\"default_operator\":\"or\",\"analyze_wildcard\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] }] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_sort_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_sort_push.json index 147184ed5e4..03e784dd2ef 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_sort_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_multi_sort_push.json @@ -7,7 +7,7 @@ "children": [{ "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"address\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"balance\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"balance\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"gender.keyword\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"account_number\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\"]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"address\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"balance\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"balance\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"gender.keyword\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"account_number\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] }] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern.yaml index 517435c9a92..a2cf5e8ac38 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern.yaml @@ -10,6 +10,6 @@ root: \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\"\ :{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\"\ ,\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"\ - ,\"patterns_field\"],\"excludes\":[]}}, pitId=*,\ + ,\"patterns_field\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml index a27ebccbfad..3f55d3dfc1a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml @@ -14,6 +14,6 @@ root: missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"pattern_count\"\ :{\"value_count\":{\"field\":\"_index\"}},\"sample_logs\":{\"top_hits\"\ :{\"from\":0,\"size\":10,\"version\":false,\"seq_no_primary_term\":false,\"\ - explain\":false,\"_source\":{\"includes\":[\"email\"],\"excludes\":[]}}}}}}},\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + explain\":false,\"_source\":{\"includes\":[\"email\"]}}}}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_basic_text.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_basic_text.json index 5ecf576cd1d..11b204e62fe 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_basic_text.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_basic_text.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"ERROR\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_numeric_comparison.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_numeric_comparison.json index d6e10550c31..722d1d6e9d9 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_numeric_comparison.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_numeric_comparison.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityNumber:>15\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_wildcard_star.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_wildcard_star.json index 8ba7e887cdf..cdc578f8382 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_search_wildcard_star.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_search_wildcard_star.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_otel_logs, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"query_string\":{\"query\":\"severityText:ERR*\",\"fields\":[],\"type\":\"best_fields\",\"default_operator\":\"or\",\"max_determinized_states\":10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\":true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"spanId\",\"traceId\",\"@timestamp\",\"instrumentationScope\",\"severityText\",\"resource\",\"flags\",\"attributes\",\"droppedAttributesCount\",\"severityNumber\",\"time\",\"body\"]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_single_field_relevance_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_single_field_relevance_push.json index de132c622f8..de9f95a681f 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_single_field_relevance_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_single_field_relevance_push.json @@ -7,7 +7,7 @@ "children": [{ "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"email\":{\"query\":\"*@gmail.com\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"email\":{\"query\":\"*@gmail.com\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] }] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_count_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_count_push.yaml index f2daa15d506..e210bccf386 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_count_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_count_push.yaml @@ -7,7 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\"\ - ,\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + includes\":[\"age\"]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_desc_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_desc_push.json index 408f860b2d2..05210ca3d61 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_desc_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_desc_push.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\",\"firstname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\",\"firstname\"]},\"sort\":[{\"age\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_push.json index c95519e037d..7b461a23b65 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_push.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=s9y3QQEhb3BlbnNlYXJjaC1zcWxfdGVzdF9pbmRleF9hY2NvdW50FkxkUmtkc296Ul9hY0ZyQWdnSklXTlEAFnJ6SVBMUjhrU3lTMHNMQXA1ckRnVWcAAAAAAAAAAAMWNEd5alM2OFhUZXVPYW1mSm1Gc1ZVUQEWTGRSa2Rzb3pSX2FjRnJBZ2dKSVdOUQAA, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=s9y3QQEhb3BlbnNlYXJjaC1zcWxfdGVzdF9pbmRleF9hY2NvdW50FkxkUmtkc296Ul9hY0ZyQWdnSklXTlEAFnJ6SVBMUjhrU3lTMHNMQXA1ckRnVWcAAAAAAAAAAAMWNEd5alM2OFhUZXVPYW1mSm1Gc1ZVUQEWTGRSa2Rzb3pSX2FjRnJBZ2dKSVdOUQAA, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_limit_push.yaml index f2daa15d506..e210bccf386 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_limit_push.yaml @@ -7,7 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\"\ - ,\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + includes\":[\"age\"]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\"\ + :\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_take.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_take.yaml index 1ea13830bf4..5a935ed8d1a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_take.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_take.yaml @@ -8,6 +8,6 @@ root: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ :{\"take\":{\"top_hits\":{\"from\":0,\"size\":2,\"version\":false,\"seq_no_primary_term\"\ - :false,\"explain\":false,\"_source\":{\"includes\":[\"firstname\"],\"excludes\"\ - :[]}}}}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + :false,\"explain\":false,\"_source\":{\"includes\":[\"firstname\"]}}}}},\ + \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/search_with_absolute_time_range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/search_with_absolute_time_range.yaml index fd1dcfce589..873ce247850 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/search_with_absolute_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/search_with_absolute_time_range.yaml @@ -13,7 +13,6 @@ root: :10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\"\ :0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\"\ :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ - :[\"@timestamp\",\"category\",\"value\",\"timestamp\"],\"excludes\":[]}},\ - \ needClean=true, searchDone=false, pitId=*,\ + :[\"@timestamp\",\"category\",\"value\",\"timestamp\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/search_with_chained_time_modifier.yaml b/integ-test/src/test/resources/expectedOutput/ppl/search_with_chained_time_modifier.yaml index 0c30419f131..0d03f1a6cbf 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/search_with_chained_time_modifier.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/search_with_chained_time_modifier.yaml @@ -13,7 +13,6 @@ root: :10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\"\ :0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\"\ :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ - :[\"@timestamp\",\"category\",\"value\",\"timestamp\"],\"excludes\":[]}},\ - \ needClean=true, searchDone=false, pitId=*,\ + :[\"@timestamp\",\"category\",\"value\",\"timestamp\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/search_with_numeric_time_range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/search_with_numeric_time_range.yaml index ebc09261253..847607dce2a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/search_with_numeric_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/search_with_numeric_time_range.yaml @@ -12,7 +12,6 @@ root: :10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\"\ :0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\"\ :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ - :[\"@timestamp\",\"category\",\"value\",\"timestamp\"],\"excludes\":[]}},\ - \ needClean=true, searchDone=false, pitId=*,\ + :[\"@timestamp\",\"category\",\"value\",\"timestamp\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/search_with_relative_time_range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/search_with_relative_time_range.yaml index ea4bcd1e8a9..2e595149f25 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/search_with_relative_time_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/search_with_relative_time_range.yaml @@ -13,7 +13,6 @@ root: fuzziness\":\"AUTO\",\"fuzzy_prefix_length\":0,\"fuzzy_max_expansions\"\ :50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\"\ :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ - :[\"@timestamp\",\"category\",\"value\",\"timestamp\"],\"excludes\":[]}},\ - \ needClean=true, searchDone=false, pitId=*,\ + :[\"@timestamp\",\"category\",\"value\",\"timestamp\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/search_with_relative_time_snap.yaml b/integ-test/src/test/resources/expectedOutput/ppl/search_with_relative_time_snap.yaml index a1d1d23866e..12928e87cba 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/search_with_relative_time_snap.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/search_with_relative_time_snap.yaml @@ -12,7 +12,6 @@ root: :10000,\"enable_position_increments\":true,\"fuzziness\":\"AUTO\",\"fuzzy_prefix_length\"\ :0,\"fuzzy_max_expansions\":50,\"phrase_slop\":0,\"escape\":false,\"auto_generate_synonyms_phrase_query\"\ :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ - :[\"@timestamp\",\"category\",\"value\",\"timestamp\"],\"excludes\":[]}},\ - \ needClean=true, searchDone=false, pitId=*,\ + :[\"@timestamp\",\"category\",\"value\",\"timestamp\"]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/script_value.json b/integ-test/src/test/resources/expectedOutput/script_value.json index 3c03baccff8..66b8c9df3b7 100644 --- a/integ-test/src/test/resources/expectedOutput/script_value.json +++ b/integ-test/src/test/resources/expectedOutput/script_value.json @@ -4,8 +4,7 @@ "_source" : { "includes" : [ "account_number" - ], - "excludes" : [ ] + ] }, "script_fields" : { "test" : { diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3655.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3655.yml index 01d1e3fed6c..2f853e4022a 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3655.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3655.yml @@ -102,7 +102,7 @@ teardown: # ppl.explain: # body: # query: "source=test | where match_phrase(body, 'field 2') | fields body" -# - match: { root.children.0.description.request: "OpenSearchQueryRequest(indexName=test, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match_phrase\":{\"body\":{\"query\":\"field 2\",\"slop\":0,\"zero_terms_query\":\"NONE\",\"boost\":1.0}}},\"_source\":{\"includes\":[\"body\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=.*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)"} +# - match: { root.children.0.description.request: "OpenSearchQueryRequest(indexName=test, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match_phrase\":{\"body\":{\"query\":\"field 2\",\"slop\":0,\"zero_terms_query\":\"NONE\",\"boost\":1.0}}},\"_source\":{\"includes\":[\"body\"]}}, needClean=true, searchDone=false, pitId=.*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)"} # # # - do: @@ -111,4 +111,4 @@ teardown: # ppl.explain: # body: # query: "source=test | where like(body, '%field 2%') | fields body" -# - match: { root.children.0.description.request: "OpenSearchQueryRequest(indexName=test, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"body\":{\"wildcard\":\"*field 2*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"body\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=.*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)"} +# - match: { root.children.0.description.request: "OpenSearchQueryRequest(indexName=test, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"wildcard\":{\"body\":{\"wildcard\":\"*field 2*\",\"case_insensitive\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"body\"]}}, needClean=true, searchDone=false, pitId=.*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)"} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3922.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3922.yml new file mode 100644 index 00000000000..dfa26f3dc50 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3922.yml @@ -0,0 +1,71 @@ +setup: + - do: + indices.create: + index: test_issue_3922 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + name: + type: keyword + category: + type: keyword + value: + type: integer + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Sort order preserved through dedup (#3922)": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_issue_3922 + refresh: true + body: + - '{"index": {}}' + - '{"name": "A", "category": "X", "value": 1}' + - '{"index": {}}' + - '{"name": "B", "category": "X", "value": 2}' + - '{"index": {}}' + - '{"name": "A", "category": "Y", "value": 3}' + - '{"index": {}}' + - '{"name": "C", "category": "Z", "value": 4}' + - '{"index": {}}' + - '{"name": "B", "category": "Z", "value": 5}' + - '{"index": {}}' + - '{"name": "D", "category": "A", "value": 10}' + - '{"index": {}}' + - '{"name": "E", "category": "B", "value": 11}' + - '{"index": {}}' + - '{"name": "F", "category": "C", "value": 12}' + - '{"index": {}}' + - '{"name": "G", "category": "D", "value": 13}' + - '{"index": {}}' + - '{"name": "D", "category": "E", "value": 14}' + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=test_issue_3922 | sort category | dedup 1 name | fields category, name' + - match: {"total": 7} + - match: {"datarows": [["A", "D"], ["B", "E"], ["C", "F"], ["D", "G"], ["X", "A"], ["X", "B"], ["Z", "C"]]} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4659.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4659.yml new file mode 100644 index 00000000000..f111f0175f9 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4659.yml @@ -0,0 +1,90 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + - do: + indices.create: + index: log_text_4659 + body: + mappings: + properties: + msg: + type: text + idx: + type: integer + - do: + indices.create: + index: log_keyword_4659 + body: + mappings: + properties: + msg: + type: keyword + idx: + type: integer + - do: + bulk: + index: log_text_4659 + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"msg": "status=200", "idx": 1}' + - do: + bulk: + index: log_keyword_4659 + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"msg": "status=200", "idx": 2}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + - do: + indices.delete: + index: log_text_4659 + ignore: 404 + - do: + indices.delete: + index: log_keyword_4659 + ignore: 404 + +--- +"PPL wildcard query returns all documents across indices with mixed text/keyword field types": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=log_*_4659 | fields msg, idx | sort idx' + - match: {"total": 2} + - match: {"datarows": [["status=200", 1], ["status=200", 2]]} + +--- +"PPL script filter works across indices with mixed text/keyword field types": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=log_*_4659 | where upper(msg) = 'STATUS=200' | fields msg, idx | sort idx" + - match: {"total": 2} + - match: {"datarows": [["status=200", 1], ["status=200", 2]]} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4800.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4800.yml new file mode 100644 index 00000000000..ef4c4769191 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4800.yml @@ -0,0 +1,57 @@ +setup: + - skip: + features: + - headers + - allowed_warnings + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: stream_test_null + body: + mappings: + properties: + name: { type: keyword } + age: { type: integer } + state: { type: keyword } + country: { type: keyword } + year: { type: integer } + month: { type: integer } + - do: + bulk: + index: stream_test_null + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"name": "Jake", "age": 70, "state": "California", "country": "USA", "year": 2023, "month": 4}' + - '{"index": {"_id": "2"}}' + - '{"name": "Hello", "age": 30, "state": "New York", "country": "USA", "year": 2023, "month": 4}' + - '{"index": {"_id": "3"}}' + - '{"name": "John", "age": 25, "state": "Ontario", "country": "Canada", "year": 2023, "month": 4}' + - '{"index": {"_id": "4"}}' + - '{"name": "Jane", "age": 20, "state": "Quebec", "country": "Canada", "year": 2023, "month": 4}' + - '{"index": {"_id": "5"}}' + - '{"name": null, "age": 10, "state": null, "country": "Canada", "year": 2023, "month": 4}' + - '{"index": {"_id": "6"}}' + - '{"name": "Kevin", "year": 2023, "month": 4}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Chained streamstats with window should not cause NPE (#4800)": + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=stream_test_null | streamstats window=2 avg(age) as avg_age by state, country | streamstats window=2 avg(avg_age) as avg_state_age by country' + - match: {"total": 6} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5099.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5099.yml new file mode 100644 index 00000000000..a30381ecf50 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5099.yml @@ -0,0 +1,59 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5099 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + name: + type: keyword + age: + type: integer + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5099", "_id": "1"}}' + - '{"name": "Alice", "age": 30}' + - '{"index": {"_index": "issue5099", "_id": "2"}}' + - '{"name": "Bob", "age": 25}' + +--- +teardown: + - do: + indices.delete: + index: issue5099 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5099: rename with wildcard should not apply on hidden fields": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5099 | rename * as old_* + + - match: { total: 2 } + - length: { schema: 2 } + - match: { schema: [ { name: "old_name", type: "string" }, { name: "old_age", type: "int" } ] } + - length: { datarows: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml new file mode 100644 index 00000000000..dd0335f73d5 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5125.yml @@ -0,0 +1,66 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5125 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + gender: + type: keyword + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5125", "_id": "1"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "2"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "3"}}' + - '{"gender": "F"}' + - '{"index": {"_index": "issue5125", "_id": "4"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "5"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "6"}}' + - '{"gender": "M"}' + - '{"index": {"_index": "issue5125", "_id": "7"}}' + - '{"gender": "M"}' + +--- +teardown: + - do: + indices.delete: + index: issue5125 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5125: consecutive sorts after agg should honor latest sort direction": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5125 | stats count() as c by gender | sort gender | sort - gender + + - match: { total: 2 } + - match: { schema: [ { name: c, type: bigint }, { name: gender, type: string } ] } + - match: { datarows: [ [ 4, "M" ], [ 3, "F" ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml new file mode 100644 index 00000000000..34cb9f24370 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml @@ -0,0 +1,62 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5165 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + int_field: + type: integer + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5165", "_id": "1"}}' + - '{"int_field": 42}' + - '{"index": {"_index": "issue5165", "_id": "2"}}' + - '{"int_field": -1}' + - '{"index": {"_index": "issue5165", "_id": "3"}}' + - '{"int_field": 0}' + - '{"index": {"_index": "issue5165", "_id": "4"}}' + - '{"int_field": 2147483647}' + - '{"index": {"_index": "issue5165", "_id": "5"}}' + - '{"int_field": null}' + +--- +teardown: + - do: + indices.delete: + index: issue5165 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5165: NOT IN should exclude null/missing rows": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5165 | where int_field NOT IN (42, -1, 0) | fields int_field + + - match: { total: 1 } + - length: { datarows: 1 } + - match: { datarows: [ [ 2147483647 ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5167.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5167.yml new file mode 100644 index 00000000000..cf18c4c425c --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5167.yml @@ -0,0 +1,84 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5167 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + int_field: + type: integer + json_data: + type: keyword + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5167", "_id": "1"}}' + - '{"int_field": 42, "json_data": "{\"name\":\"alice\",\"scores\":[90,85,92]}"}' + +--- +teardown: + - do: + indices.delete: + index: issue5167 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5167: json_set with $.key path should update the value": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5167 | where int_field = 42 | eval modified = json_set(json_data, '$.name', 'modified_alice') | fields modified" + + - match: { total: 1 } + - match: { datarows: [ [ "{\"name\":\"modified_alice\",\"scores\":[90,85,92]}" ] ] } + +--- +"Issue 5167: json_delete with $.key path should remove the key": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5167 | where int_field = 42 | eval deleted = json_delete(json_data, '$.name') | fields deleted" + + - match: { total: 1 } + - match: { datarows: [ [ "{\"scores\":[90,85,92]}" ] ] } + +--- +"Issue 5167: json_set with unprefixed path still works": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5167 | where int_field = 42 | eval modified = json_set(json_data, 'name', 'bob') | fields modified" + + - match: { total: 1 } + - match: { datarows: [ [ "{\"name\":\"bob\",\"scores\":[90,85,92]}" ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5169.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5169.yml new file mode 100644 index 00000000000..478fda45d46 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5169.yml @@ -0,0 +1,49 @@ +setup: + - do: + indices.create: + index: issue5169_keyword + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + keyword_field: + type: keyword + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5169_keyword", "_id": "1"}}' + - '{"keyword_field": "hello"}' + - '{"index": {"_index": "issue5169_keyword", "_id": "2"}}' + - '{"keyword_field": "world"}' + - '{"index": {"_index": "issue5169_keyword", "_id": "3"}}' + - '{"keyword_field": ""}' + - '{"index": {"_index": "issue5169_keyword", "_id": "4"}}' + - '{"keyword_field": "special chars..."}' + - '{"index": {"_index": "issue5169_keyword", "_id": "5"}}' + - '{"keyword_field": null}' + +--- +teardown: + - do: + indices.delete: + index: issue5169_keyword + ignore_unavailable: true + +--- +"Issue 5169: NOT LIKE should exclude null/missing field rows": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5169_keyword | where NOT keyword_field LIKE '%ello%' | fields keyword_field + + - match: { total: 3 } + - length: { datarows: 3 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5172.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5172.yml new file mode 100644 index 00000000000..239d06ec968 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5172.yml @@ -0,0 +1,78 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5172 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + count: + type: integer + category: + type: keyword + subcategory: + type: keyword + value: + type: double + ts: + type: date + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5172", "_id": "1"}}' + - '{"count": 1, "category": "A", "subcategory": "X", "value": 10.5, "ts": "2024-01-01"}' + - '{"index": {"_index": "issue5172", "_id": "2"}}' + - '{"count": 2, "category": "A", "subcategory": "Y", "value": 20.3, "ts": "2024-01-02"}' + +--- +teardown: + - do: + indices.delete: + index: issue5172 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5172: transpose with value field name collision": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5172 | where category = "A" | fields category, value | transpose 2 + + - match: { total: 2 } + - match: { schema: [ { name: column, type: string }, { name: "row 1", type: string }, { name: "row 2", type: string } ] } + - length: { datarows: 2 } + +--- +"Issue 5172: transpose with stats alias named value": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5172 | stats count() as value, avg(value) as avg_val | transpose + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5173.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5173.yml new file mode 100644 index 00000000000..3db25e24f56 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5173.yml @@ -0,0 +1,99 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5173 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + gender: + type: keyword + age: + type: integer + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5173", "_id": "1"}}' + - '{"gender": "F", "age": 10}' + - '{"index": {"_index": "issue5173", "_id": "2"}}' + - '{"gender": "F", "age": 20}' + - '{"index": {"_index": "issue5173", "_id": "3"}}' + - '{"gender": "M", "age": 30}' + - '{"index": {"_index": "issue5173", "_id": "4"}}' + - '{"gender": "M", "age": 40}' + +--- +teardown: + - do: + indices.delete: + index: issue5173 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5173: double appendpipe with different aggregations should succeed": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5173 | stats sum(age) as sum_age by gender | appendpipe [ stats avg(sum_age) as avg_sum_age ] | appendpipe [ stats max(sum_age) as max_sum_age ]" + + - match: { total: 4 } + - match: + schema: + - { name: sum_age, type: bigint } + - { name: gender, type: string } + - { name: avg_sum_age, type: double } + - { name: max_sum_age, type: bigint } + - match: + datarows: + - [ 30, "F", null, null ] + - [ 70, "M", null, null ] + - [ null, null, 50.0, null ] + - [ null, null, null, 70 ] + +--- +"Issue 5173: triple appendpipe with different aggregations should succeed": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5173 | stats sum(age) as sum_age by gender | appendpipe [ stats avg(sum_age) as avg_sum_age ] | appendpipe [ stats max(sum_age) as max_sum_age ] | appendpipe [ stats min(sum_age) as min_sum_age ]" + + - match: { total: 5 } + - match: + schema: + - { name: sum_age, type: bigint } + - { name: gender, type: string } + - { name: avg_sum_age, type: double } + - { name: max_sum_age, type: bigint } + - { name: min_sum_age, type: bigint } + - match: + datarows: + - [ 30, "F", null, null, null ] + - [ 70, "M", null, null, null ] + - [ null, null, 50.0, null, null ] + - [ null, null, null, 70, null ] + - [ null, null, null, null, 30 ] diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml new file mode 100644 index 00000000000..c2f861b8194 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml @@ -0,0 +1,83 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5174 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + count: + type: integer + category: + type: keyword + subcategory: + type: keyword + value: + type: double + ts: + type: date + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5174", "_id": "1"}}' + - '{"count": 1, "category": "A", "subcategory": "X", "value": 10.5, "ts": "2024-01-01"}' + - '{"index": {"_index": "issue5174", "_id": "2"}}' + - '{"count": 2, "category": "A", "subcategory": "Y", "value": 20.3, "ts": "2024-01-02"}' + - '{"index": {"_index": "issue5174", "_id": "3"}}' + - '{"count": 10, "category": "B", "subcategory": "X", "value": 100.0, "ts": "2024-01-03"}' + - '{"index": {"_index": "issue5174", "_id": "4"}}' + - '{"count": null, "category": "B", "subcategory": "Y", "value": null, "ts": "2024-01-04"}' + +--- +teardown: + - do: + indices.delete: + index: issue5174 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5174: bin then chart with null values should not cause NPE": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin by category + + - match: { total: 2 } + - match: { schema: [ { name: val_bin, type: string }, { name: category, type: string }, { name: "count()", type: bigint } ] } + - match: { datarows: [ [ "0-50", "A", 2 ], [ "100-150", "B", 1 ] ] } + +--- +"Issue 5174: bin then chart with single group and null values": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin + + - match: { total: 2 } + - match: { schema: [ { name: val_bin, type: string }, { name: "count()", type: bigint } ] } + - match: { datarows: [ [ "0-50", 2 ], [ "100-150", 1 ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5175.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5175.yml new file mode 100644 index 00000000000..34139cbbf4d --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5175.yml @@ -0,0 +1,85 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5175 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + dummy: + type: keyword + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5175", "_id": "1"}}' + - '{"dummy": "row"}' + +--- +teardown: + - do: + indices.delete: + index: issue5175 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5175: COALESCE(null, 42) returns integer 42": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5175 | eval x = COALESCE(null, 42) | fields x | head 1 + + - match: { total: 1 } + - match: { schema: [ { name: x, type: int } ] } + - match: { datarows: [ [ 42 ] ] } + +--- +"Issue 5175: COALESCE(42, null) returns integer 42": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5175 | eval x = COALESCE(42, null) | fields x | head 1 + + - match: { total: 1 } + - match: { schema: [ { name: x, type: int } ] } + - match: { datarows: [ [ 42 ] ] } + +--- +"Issue 5175: COALESCE(null, 3.14) returns double": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5175 | eval x = COALESCE(null, 3.14) | fields x | head 1 + + - match: { total: 1 } + - match: { schema: [ { name: x, type: double } ] } + - match: { datarows: [ [ 3.14 ] ] } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5185.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5185.yml new file mode 100644 index 00000000000..0f939a03585 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5185.yml @@ -0,0 +1,69 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5185 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + doc: + type: text + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5185", "_id": "1"}}' + - '{"doc": "{\"user\":{\"name\":\"John\",\"age\":30}}"}' + - '{"index": {"_index": "issue5185", "_id": "2"}}' + - '{"doc": "{\"user\":{\"name\":\"Alice\",\"age\":25}}"}' + +--- +teardown: + - do: + indices.delete: + index: issue5185 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5185: eval with multiple dotted-path assignments from MAP column": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5185 | spath input=doc | eval doc.user.name=doc.user.name, doc.user.age=doc.user.age | fields doc.user.name, doc.user.age" + + - match: { total: 2 } + - length: { datarows: 2 } + +--- +"Issue 5185: separate eval commands with dotted-path from MAP column": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: "source=issue5185 | spath input=doc | eval doc.user.name=doc.user.name | eval doc.user.age=doc.user.age | fields doc.user.name, doc.user.age" + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml new file mode 100644 index 00000000000..8c49825e6e2 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5269.yml @@ -0,0 +1,63 @@ +setup: + - do: + indices.create: + index: issue5269_bool + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + flag: + type: boolean + startTime: + type: date_nanos + + - do: + indices.create: + index: issue5269_text + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + flag: + type: text + startTime: + type: date_nanos + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5269_bool", "_id": "1"}}' + - '{"startTime": "2026-03-25T20:25:00.000Z", "flag": false}' + - '{"index": {"_index": "issue5269_text", "_id": "1"}}' + - '{"startTime": "2026-03-24T20:25:00.000Z", "flag": 0}' + +--- +teardown: + - do: + indices.delete: + index: issue5269_bool + ignore_unavailable: true + - do: + indices.delete: + index: issue5269_text + ignore_unavailable: true + +--- +"Issue 5269: PPL wildcard query across indices with boolean/text mapping conflict should not error": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5269_* | fields flag + + - match: { total: 2 } + - length: { datarows: 2 } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl/error_handling.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl/error_handling.yml new file mode 100644 index 00000000000..74219956c3c --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/ppl/error_handling.yml @@ -0,0 +1,116 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"Test field not found returns 400 bad_request": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_error_handling + refresh: true + body: + - '{"index": {}}' + - '{"age": 25, "name": "John"}' + - '{"index": {}}' + - '{"age": 30, "name": "Jane"}' + + - do: + catch: bad_request + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_error_handling | fields nonexistent_field + - match: {"$body": "/[Ff]ield|[Cc]olumn/"} + +--- +"Test index not found returns 404 missing": + - skip: + features: + - headers + - do: + catch: missing + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=nonexistent_index_12345 | fields age + +--- +"Test syntax error returns 400 bad_request": + - skip: + features: + - headers + - do: + bulk: + index: test_error_syntax + refresh: true + body: + - '{"index": {}}' + - '{"age": 25}' + + - do: + catch: bad_request + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_error_syntax | invalid_command_xyz + +--- +"Test semantic error returns 400 bad_request": + - skip: + features: + - headers + - do: + bulk: + index: test_error_semantic + refresh: true + body: + - '{"index": {}}' + - '{"age": 25, "name": "John"}' + + - do: + catch: bad_request + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_error_semantic | where age IN ('a', 'b', 'c') | fields age + +--- +"Test aggregation validation error returns 400 bad_request": + - skip: + features: + - headers + - do: + bulk: + index: test_error_agg + refresh: true + body: + - '{"index": {}}' + - '{"age": 25, "name": "John"}' + + - do: + catch: bad_request + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_error_agg | stats count(eval(age)) as cnt + - match: {"$body": "/[Cc]ondition.*boolean|[Bb]oolean.*expected/"} diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java b/legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java index 166266ca79a..859e2bae31c 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java @@ -60,6 +60,7 @@ public class DefaultCursor implements Cursor { private static final String PIT_ID = "p"; private static final String SEARCH_REQUEST = "r"; private static final String SORT_FIELDS = "h"; + private static final String INDICES = "x"; private static final ObjectMapper objectMapper = new ObjectMapper(); /** @@ -69,6 +70,13 @@ public class DefaultCursor implements Cursor { */ @NonNull private String indexPattern; + /** + * Concrete index names from the original query's FROM clause. Used to scope continuation + * SearchRequests so Security FGAC authorizes against the same indices as page 1 instead of a + * wildcard. + */ + private String[] indices; + /** * List of Schema.Column for maintaining field order and generating null values of missing fields */ @@ -137,6 +145,7 @@ public String generateCursorId() { throw new RuntimeException("Failed to serialize sort fields to JSON string.", e); } json.put(SORT_FIELDS, sortFieldValue); + json.put(INDICES, new JSONArray(indices == null ? new String[0] : indices)); setSearchRequestString(json, searchSourceBuilder); return String.format("%s:%s", type.getId(), encodeCursor(json)); @@ -173,10 +182,23 @@ public static DefaultCursor from(String cursorId) { populateCursorForPit(json, cursor); cursor.setColumns(getColumnsFromSchema(json.getJSONArray(SCHEMA_COLUMNS))); cursor.setFieldAliasMap(fieldAliasMap(json.getJSONObject(FIELD_ALIAS_MAP))); + cursor.setIndices(getIndicesFromJson(json)); return cursor; } + private static String[] getIndicesFromJson(JSONObject json) { + JSONArray arr = json.optJSONArray(INDICES); + if (arr == null) { + return new String[0]; + } + String[] result = new String[arr.length()]; + for (int i = 0; i < arr.length(); i++) { + result[i] = arr.getString(i); + } + return result; + } + private static void populateCursorForPit(JSONObject json, DefaultCursor cursor) { cursor.setPitId(json.getString(PIT_ID)); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/hints/HintFactory.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/hints/HintFactory.java index 81b676e3d58..1754e7fc5a6 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/hints/HintFactory.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/hints/HintFactory.java @@ -5,15 +5,14 @@ package org.opensearch.sql.legacy.domain.hints; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import com.fasterxml.jackson.dataformat.yaml.YAMLParser; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.yaml.YamlXContentParser; +import org.opensearch.common.xcontent.yaml.YamlXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; import org.opensearch.sql.legacy.exception.SqlParseException; /** Created by Eliran on 5/9/2015. */ @@ -104,13 +103,9 @@ public static Hint getHintFromString(String hintAsString) throws SqlParseExcepti builder.append(highlights[i]); } String heighlightParam = builder.toString(); - YAMLFactory yamlFactory = new YAMLFactory(); - YAMLParser yamlParser = null; - try { - yamlParser = yamlFactory.createParser(heighlightParam.toCharArray()); - YamlXContentParser yamlXContentParser = - new YamlXContentParser( - NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, yamlParser); + try (XContentParser yamlXContentParser = + YamlXContent.yamlXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, heighlightParam)) { Map map = yamlXContentParser.map(); hintParams.add(map); } catch (IOException e) { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorResultExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorResultExecutor.java index 8adffea526e..462237f1f09 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorResultExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorResultExecutor.java @@ -115,7 +115,10 @@ private String handleDefaultCursorRequest(Client client, DefaultCursor cursor) { SearchSourceBuilder source = cursor.getSearchSourceBuilder(); source.searchAfter(cursor.getSortFields()); source.pointInTimeBuilder(new PointInTimeBuilder(pitId)); - SearchRequest searchRequest = new SearchRequest(); + // Scope continuation to the original query's indices; an empty-indices SearchRequest + // resolves to a wildcard under Security FGAC and gets denied on page 2. + String[] indices = cursor.getIndices(); + SearchRequest searchRequest = new SearchRequest(indices == null ? new String[0] : indices); searchRequest.source(source); scrollResponse = client.search(searchRequest).actionGet(); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java index 8ef4b1396a0..c74d20a239a 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java @@ -152,6 +152,7 @@ private DefaultCursor createCursorWithPit( cursor.setLimit(queryAction.getSelect().getRowCount()); cursor.setFetchSize(fetchSize); cursor.setPitId(pit.getPitId()); + cursor.setIndices(queryAction.getSelect().getIndexArr()); cursor.setSearchSourceBuilder(queryAction.getRequestBuilder().request().source()); if (response.getHits().getHits().length > 0) { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java index 9be2367dcaa..2950f6c9b85 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java @@ -5,8 +5,6 @@ package org.opensearch.sql.legacy.plugin; -import static org.opensearch.core.rest.RestStatus.BAD_REQUEST; -import static org.opensearch.core.rest.RestStatus.INTERNAL_SERVER_ERROR; import static org.opensearch.core.rest.RestStatus.OK; import com.alibaba.druid.sql.parser.ParserException; @@ -19,6 +17,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.BiFunction; import java.util.function.Predicate; import java.util.regex.Pattern; import org.apache.logging.log4j.LogManager; @@ -33,6 +32,7 @@ import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.common.utils.QueryContext; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; @@ -83,10 +83,21 @@ public class RestSqlAction extends BaseRestHandler { /** New SQL query request handler. */ private final RestSQLQueryAction newSqlQueryHandler; - public RestSqlAction(Settings settings, Injector injector) { + /** + * Analytics router. Called before the normal SQL engine. Accepts the request and channel, returns + * {@code true} if it handled the request (analytics index), {@code false} to fall through to + * normal SQL engine. + */ + private final BiFunction analyticsRouter; + + public RestSqlAction( + Settings settings, + Injector injector, + BiFunction analyticsRouter) { super(); this.allowExplicitIndex = MULTI_ALLOW_EXPLICIT_INDEX.get(settings); this.newSqlQueryHandler = new RestSQLQueryAction(injector); + this.analyticsRouter = analyticsRouter; } @Override @@ -134,7 +145,6 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli Format format = SqlRequestParam.getFormat(request.params()); - // Route request to new query engine if it's supported already SQLQueryRequest newSqlRequest = new SQLQueryRequest( sqlRequest.getJsonContent(), @@ -142,40 +152,81 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli request.path(), request.params(), sqlRequest.cursor()); - return newSqlQueryHandler.prepareRequest( - newSqlRequest, - (restChannel, exception) -> { - try { - if (newSqlRequest.isExplainRequest()) { - LOG.info( - "Request is falling back to old SQL engine due to: " + exception.getMessage()); - } - LOG.info( - "[{}] Request {} is not supported and falling back to old SQL engine", - QueryContext.getRequestId(), - newSqlRequest); - LOG.info("Request Query: {}", QueryDataAnonymizer.anonymizeData(sqlRequest.getSql())); - QueryAction queryAction = explainRequest(client, sqlRequest, format); - executeSqlRequest(request, queryAction, client, restChannel); - } catch (Exception e) { - handleException(restChannel, e); - } - }, - this::handleException); + + // Route to analytics engine for non-Lucene (e.g., Parquet-backed) indices. + // The router returns true and sends the response directly if it handled the request. + final SQLQueryRequest finalRequest = newSqlRequest; + return channel -> { + if (!analyticsRouter.apply(finalRequest, channel)) { + delegateToV2Engine(request, client, sqlRequest, finalRequest, format, channel); + } + }; } catch (Exception e) { return channel -> handleException(channel, e); } } + /** Delegate a SQL query to the V2 engine with legacy fallback. */ + private void delegateToV2Engine( + RestRequest request, + NodeClient client, + SqlRequest sqlRequest, + SQLQueryRequest sqlQueryRequest, + Format format, + RestChannel channel) { + try { + newSqlQueryHandler + .prepareRequest( + sqlQueryRequest, + (restChannel, exception) -> { + try { + if (sqlQueryRequest.isExplainRequest()) { + LOG.info( + "Request is falling back to old SQL engine due to: " + + exception.getMessage()); + } + LOG.info( + "[{}] Request {} is not supported and falling back to old SQL engine", + QueryContext.getRequestId(), + sqlQueryRequest); + LOG.info( + "Request Query: {}", QueryDataAnonymizer.anonymizeData(sqlRequest.getSql())); + QueryAction queryAction = explainRequest(client, sqlRequest, format); + executeSqlRequest(request, queryAction, client, restChannel); + } catch (Exception e) { + handleException(restChannel, e); + } + }, + this::handleException) + .accept(channel); + } catch (Exception e) { + handleException(channel, e); + } + } + private void handleException(RestChannel restChannel, Exception exception) { - logAndPublishMetrics(exception); - if (exception instanceof OpenSearchException) { - OpenSearchException openSearchException = (OpenSearchException) exception; - reportError(restChannel, openSearchException, openSearchException.status()); - } else { - reportError( - restChannel, exception, isClientError(exception) ? BAD_REQUEST : INTERNAL_SERVER_ERROR); + RestStatus status = getRestStatus(exception); + logAndPublishMetrics(status, exception); + reportError(restChannel, exception, status); + } + + private static RestStatus getRestStatus(Exception ex) { + int code = getRawErrorCode(ex); + return RestStatus.fromCode(code); + } + + private static int getRawErrorCode(Exception ex) { + // Recursively unwrap ErrorReport to get to the underlying cause + if (ex instanceof ErrorReport) { + return getRawErrorCode(((ErrorReport) ex).getCause()); } + if (ex instanceof OpenSearchException) { + return ((OpenSearchException) ex).status().getStatus(); + } + if (isClientError(ex)) { + return 400; + } + return 500; } /** @@ -208,13 +259,15 @@ private void handleCursorRequest( cursorRestExecutor.execute(client, request.params(), channel); } - private static void logAndPublishMetrics(final Exception e) { - if (isClientError(e)) { + private static void logAndPublishMetrics(final RestStatus status, final Exception e) { + if (400 <= status.getStatus() && status.getStatus() < 500) { LOG.error(QueryContext.getRequestId() + " Client side error during query execution", e); Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_CUS).increment(); - } else { + } else if (500 <= status.getStatus() && status.getStatus() < 600) { LOG.error(QueryContext.getRequestId() + " Server side error during query execution", e); Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + } else { + LOG.warn("Got an exception returning non-error status {}", status, e); } } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/QueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/QueryAction.java index 122ff641b2d..8873e8aeabc 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/QueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/QueryAction.java @@ -5,7 +5,6 @@ package org.opensearch.sql.legacy.query; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -14,8 +13,9 @@ import org.opensearch.action.search.SearchRequestBuilder; import org.opensearch.action.support.IndicesOptions; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.collapse.CollapseBuilder; import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; @@ -88,16 +88,15 @@ public Optional> getFieldNames() { protected void updateRequestWithCollapse(Select select, SearchRequestBuilder request) throws SqlParseException { - JsonFactory jsonFactory = new JsonFactory(); for (Hint hint : select.getHints()) { if (hint.getType() == HintType.COLLAPSE && hint.getParams() != null && 0 < hint.getParams().length) { - try (JsonXContentParser parser = - new JsonXContentParser( + try (XContentParser parser = + JsonXContent.jsonXContent.createParser( NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, - jsonFactory.createParser(hint.getParams()[0].toString()))) { + hint.getParams()[0].toString())) { request.setCollapse(CollapseBuilder.fromXContent(parser)); } catch (IOException e) { throw new SqlParseException("could not parse collapse hint: " + e.getMessage()); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/maker/AggMaker.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/maker/AggMaker.java index 75753ce24fb..b167c67021e 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/maker/AggMaker.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/maker/AggMaker.java @@ -6,7 +6,6 @@ package org.opensearch.sql.legacy.query.maker; import com.alibaba.druid.sql.ast.expr.SQLAggregateOption; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.math.BigDecimal; import java.time.ZoneOffset; @@ -19,7 +18,6 @@ import org.apache.commons.lang3.StringUtils; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.common.xcontent.json.JsonXContentParser; import org.opensearch.core.common.ParsingException; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -340,11 +338,9 @@ private AggregationBuilder termsAgg(MethodField field) throws SqlParseException terms.order(BucketOrder.key(false)); } else { List orderElements = new ArrayList<>(); - try (JsonXContentParser parser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(value))) { + try (XContentParser parser = + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, value)) { XContentParser.Token currentToken = parser.nextToken(); if (currentToken == XContentParser.Token.START_OBJECT) { orderElements.add(InternalOrder.Parser.parseOrderParam(parser)); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/request/SqlRequest.java b/legacy/src/main/java/org/opensearch/sql/legacy/request/SqlRequest.java index bffdd36688e..6a6ef66196f 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/request/SqlRequest.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/request/SqlRequest.java @@ -5,14 +5,13 @@ package org.opensearch.sql.legacy.request; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import java.util.Collections; import org.json.JSONException; import org.json.JSONObject; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.query.BoolQueryBuilder; @@ -90,10 +89,10 @@ private void addFilterFromJson(BoolQueryBuilder boolQuery) throws SqlParseExcept String filter = getFilterObjectAsString(jsonContent); SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList()); XContentParser parser = - new JsonXContentParser( + JsonXContent.jsonXContent.createParser( new NamedXContentRegistry(searchModule.getNamedXContents()), LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(filter)); + filter); // nextToken is called before passing the parser to fromXContent since the fieldName will be // null if the diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/utils/JsonPrettyFormatter.java b/legacy/src/main/java/org/opensearch/sql/legacy/utils/JsonPrettyFormatter.java index 26f17feeb69..0f11374cfd0 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/utils/JsonPrettyFormatter.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/utils/JsonPrettyFormatter.java @@ -5,11 +5,10 @@ package org.opensearch.sql.legacy.utils; -import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; @@ -26,10 +25,8 @@ public static String format(String jsonString) throws IOException { // turn _explain response into pretty formatted Json XContentBuilder contentBuilder = XContentFactory.jsonBuilder().prettyPrint(); try (XContentParser contentParser = - new JsonXContentParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(jsonString))) { + JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, jsonString)) { contentBuilder.copyCurrentStructure(contentParser); } return contentBuilder.toString(); diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/cursor/DefaultCursorTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/cursor/DefaultCursorTest.java index 5a5840b4469..216c057a1e7 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/cursor/DefaultCursorTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/cursor/DefaultCursorTest.java @@ -8,6 +8,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.emptyOrNullString; import static org.hamcrest.Matchers.startsWith; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doReturn; @@ -15,7 +16,9 @@ import java.io.ByteArrayOutputStream; import java.util.ArrayList; +import java.util.Base64; import java.util.Collections; +import org.json.JSONObject; import org.junit.Before; import org.junit.Test; import org.mockito.Mock; @@ -88,4 +91,93 @@ public void nullCursorWhenScrollIDIsNullOrEmpty() { cursor.setScrollId(""); assertThat(cursor.generateCursorId(), emptyOrNullString()); } + + @Test + public void indicesAreSerializedIntoCursorId() { + DefaultCursor cursor = new DefaultCursor(); + cursor.setRowsLeft(50); + cursor.setPitId("pit-id"); + cursor.setIndexPattern("idx1|idx2"); + cursor.setFetchSize(500); + cursor.setFieldAliasMap(Collections.emptyMap()); + cursor.setColumns(new ArrayList<>()); + cursor.setIndices(new String[] {"idx1", "idx2"}); + cursor.setSearchSourceBuilder(sourceBuilder); + + String cursorId = cursor.generateCursorId(); + JSONObject decoded = decodePayload(cursorId); + + assertEquals(2, decoded.getJSONArray("x").length()); + assertEquals("idx1", decoded.getJSONArray("x").getString(0)); + assertEquals("idx2", decoded.getJSONArray("x").getString(1)); + } + + @Test + public void nullIndicesAreSerializedAsEmptyArray() { + DefaultCursor cursor = new DefaultCursor(); + cursor.setRowsLeft(50); + cursor.setPitId("pit-id"); + cursor.setIndexPattern("idx1"); + cursor.setFetchSize(500); + cursor.setFieldAliasMap(Collections.emptyMap()); + cursor.setColumns(new ArrayList<>()); + cursor.setSearchSourceBuilder(sourceBuilder); + + String cursorId = cursor.generateCursorId(); + JSONObject decoded = decodePayload(cursorId); + + assertEquals(0, decoded.getJSONArray("x").length()); + } + + @Test + public void deserializeRoundTripsIndices() { + SearchSourceBuilder realSource = new SearchSourceBuilder(); + DefaultCursor cursor = new DefaultCursor(); + cursor.setRowsLeft(50); + cursor.setPitId("pit-id"); + cursor.setIndexPattern("idx1|idx2"); + cursor.setFetchSize(500); + cursor.setFieldAliasMap(Collections.emptyMap()); + cursor.setColumns(new ArrayList<>()); + cursor.setIndices(new String[] {"idx1", "idx2"}); + cursor.setSearchSourceBuilder(realSource); + + String cursorId = cursor.generateCursorId(); + String payload = cursorId.substring(cursorId.indexOf(':') + 1); + DefaultCursor restored = DefaultCursor.from(payload); + + assertArrayEquals(new String[] {"idx1", "idx2"}, restored.getIndices()); + } + + @Test + public void deserializeLegacyCursorWithoutIndicesDefaultsToEmptyArray() { + // Legacy cursor payloads written before this fix do not contain the "x" field. + // They must continue to deserialize cleanly with indices == [] so in-flight + // cursors from pre-fix nodes are not rejected after upgrade. + SearchSourceBuilder realSource = new SearchSourceBuilder(); + DefaultCursor cursor = new DefaultCursor(); + cursor.setRowsLeft(50); + cursor.setPitId("pit-id"); + cursor.setIndexPattern("idx1"); + cursor.setFetchSize(500); + cursor.setFieldAliasMap(Collections.emptyMap()); + cursor.setColumns(new ArrayList<>()); + cursor.setIndices(new String[] {"idx1"}); + cursor.setSearchSourceBuilder(realSource); + + String cursorId = cursor.generateCursorId(); + String payload = cursorId.substring(cursorId.indexOf(':') + 1); + JSONObject json = new JSONObject(new String(Base64.getDecoder().decode(payload))); + json.remove("x"); + String legacyPayload = Base64.getEncoder().encodeToString(json.toString().getBytes()); + + DefaultCursor restored = DefaultCursor.from(legacyPayload); + + assertArrayEquals(new String[0], restored.getIndices()); + } + + private JSONObject decodePayload(String cursorId) { + String payload = cursorId.substring(cursorId.indexOf(':') + 1); + return new JSONObject(new String(Base64.getDecoder().decode(payload))); + } } diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/util/AggregationUtils.java b/legacy/src/test/java/org/opensearch/sql/legacy/util/AggregationUtils.java index 85da1d990f0..e3d1a485856 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/util/AggregationUtils.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/util/AggregationUtils.java @@ -5,13 +5,12 @@ package org.opensearch.sql.legacy.util; -import com.fasterxml.jackson.core.JsonFactory; import com.google.common.collect.ImmutableMap; import java.io.IOException; import java.util.List; import java.util.stream.Collectors; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.ParseField; import org.opensearch.core.xcontent.ContextParser; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -78,10 +77,8 @@ Aggregation.class, new ParseField(entry.getKey()), entry.getValue())) public static Aggregations fromJson(String json) { try { XContentParser xContentParser = - new JsonXContentParser( - namedXContentRegistry, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(json)); + JsonXContent.jsonXContent.createParser( + namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, json); xContentParser.nextToken(); return Aggregations.fromXContent(xContentParser); } catch (IOException e) { diff --git a/legacy/src/test/resources/expectedOutput/aggregation_query_explain.json b/legacy/src/test/resources/expectedOutput/aggregation_query_explain.json index 9675b2b5beb..b0d0ac12251 100644 --- a/legacy/src/test/resources/expectedOutput/aggregation_query_explain.json +++ b/legacy/src/test/resources/expectedOutput/aggregation_query_explain.json @@ -6,8 +6,7 @@ "address", "script", "COUNT" - ], - "excludes" : [ ] + ] }, "stored_fields" : [ "address", diff --git a/legacy/src/test/resources/expectedOutput/explain_format_pretty.json b/legacy/src/test/resources/expectedOutput/explain_format_pretty.json index d03679ce6d0..622df896535 100644 --- a/legacy/src/test/resources/expectedOutput/explain_format_pretty.json +++ b/legacy/src/test/resources/expectedOutput/explain_format_pretty.json @@ -4,7 +4,6 @@ "_source" : { "includes" : [ "firstname" - ], - "excludes" : [ ] + ] } } \ No newline at end of file diff --git a/legacy/src/test/resources/explain_format_oneline.json b/legacy/src/test/resources/explain_format_oneline.json index a63860847e2..2aad4677ea3 100644 --- a/legacy/src/test/resources/explain_format_oneline.json +++ b/legacy/src/test/resources/explain_format_oneline.json @@ -1 +1 @@ -{"from" : 0, "size" : 200, "_source" : {"includes" : ["firstname"], "excludes" : [ ]}} \ No newline at end of file +{"from" : 0, "size" : 200, "_source" : {"includes" : ["firstname"]}} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java index dab4b1e8ff1..b491f38ef80 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java @@ -30,6 +30,8 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexSettings; +import org.opensearch.sql.common.error.ErrorCode; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.opensearch.mapping.IndexMapping; import org.opensearch.sql.opensearch.request.OpenSearchRequest; import org.opensearch.sql.opensearch.request.OpenSearchScrollRequest; @@ -97,12 +99,27 @@ public Map getIndexMappings(String... indexExpression) { .collect( Collectors.toUnmodifiableMap( Map.Entry::getKey, cursor -> new IndexMapping(cursor.getValue()))); - } catch (IndexNotFoundException | OpenSearchSecurityException e) { + } catch (IndexNotFoundException e) { // Re-throw directly to be treated as client error finally - throw e; + throw ErrorReport.wrap(e) + .code(ErrorCode.INDEX_NOT_FOUND) + .location("while fetching index mappings") + .context("index_name", indexExpression[0]) + .build(); + } catch (OpenSearchSecurityException e) { + // Re-throw with permission denied code + throw ErrorReport.wrap(e) + .code(ErrorCode.PERMISSION_DENIED) + .location("while fetching index mappings") + .context("index_name", indexExpression[0]) + .build(); } catch (Exception e) { throw new IllegalStateException( - "Failed to read mapping for index pattern [" + indexExpression + "]", e); + "Failed to read mapping for index pattern [" + + String.join(",", indexExpression) + + "]: " + + e.getMessage(), + e); } } @@ -132,7 +149,11 @@ public Map getIndexMaxResultWindows(String... indexExpression) throw e; } catch (Exception e) { throw new IllegalStateException( - "Failed to read setting for index pattern [" + indexExpression + "]", e); + "Failed to read setting for index pattern [" + + String.join(",", indexExpression) + + "]: " + + e.getMessage(), + e); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java index 427eb7d6b03..f369c0003b8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java @@ -79,7 +79,12 @@ public Map getIndexMappings(String... indexExpression) { return response.mappings().entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> new IndexMapping(e.getValue()))); } catch (IOException e) { - throw new IllegalStateException("Failed to get index mappings for " + indexExpression, e); + throw new IllegalStateException( + "Failed to get index mappings for " + + String.join(",", indexExpression) + + ": " + + e.getMessage(), + e); } } @@ -111,7 +116,12 @@ public Map getIndexMaxResultWindows(String... indexExpression) return result; } catch (IOException e) { - throw new IllegalStateException("Failed to get max result window for " + indexExpression, e); + throw new IllegalStateException( + "Failed to get max result window for " + + String.join(",", indexExpression) + + ": " + + e.getMessage(), + e); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java index 837a2a062ef..79d49a143de 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java @@ -43,7 +43,8 @@ public enum MappingType { ScaledFloat("scaled_float", ExprCoreType.DOUBLE), Double("double", ExprCoreType.DOUBLE), Boolean("boolean", ExprCoreType.BOOLEAN), - Alias("alias", ExprCoreType.UNKNOWN); + Alias("alias", ExprCoreType.UNKNOWN), + KnnVector("knn_vector", ExprCoreType.ARRAY); // TODO: ranges, geo shape, point, shape private final String name; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java index 2944aae77f1..93c2c6b1584 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java @@ -19,7 +19,7 @@ import org.opensearch.common.Numbers; import org.opensearch.common.geo.GeoPoint; import org.opensearch.common.geo.GeoUtils; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; @@ -149,10 +149,10 @@ public Object objectValue() { public Pair geoValue() { final JsonNode value = value(); try (XContentParser parser = - new JsonXContentParser( + JsonXContent.jsonXContent.createParser( NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, - value.traverse())) { + value.toString())) { parser.nextToken(); GeoPoint point = new GeoPoint(); GeoUtils.parseGeoPoint(parser, point, true); @@ -212,6 +212,8 @@ private boolean parseBooleanValue(JsonNode node) { return node.booleanValue(); } else if (node.isTextual()) { return Boolean.parseBoolean(node.textValue()); + } else if (node.isNumber()) { + return node.intValue() != 0; } else { if (LOG.isDebugEnabled()) { LOG.debug("node '{}' must be a boolean", node); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java index dacb7f97eab..7aaaaa6655e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java @@ -16,6 +16,7 @@ import org.opensearch.sql.executor.QueryId; import org.opensearch.sql.executor.QueryManager; import org.opensearch.sql.executor.execution.AbstractPlan; +import org.opensearch.tasks.CancellableTask; import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.client.node.NodeClient; @@ -33,15 +34,32 @@ public class OpenSearchQueryManager implements QueryManager { public static final String SQL_WORKER_THREAD_POOL_NAME = "sql-worker"; public static final String SQL_BACKGROUND_THREAD_POOL_NAME = "sql_background_io"; + private static final ThreadLocal cancellableTask = new ThreadLocal<>(); + + public static void setCancellableTask(CancellableTask task) { + cancellableTask.set(task); + } + + public static CancellableTask getCancellableTask() { + return cancellableTask.get(); + } + + public static void clearCancellableTask() { + cancellableTask.remove(); + } + @Override public QueryId submit(AbstractPlan queryPlan) { TimeValue timeout = settings.getSettingValue(Settings.Key.PPL_QUERY_TIMEOUT); - schedule(nodeClient, queryPlan::execute, timeout); + CancellableTask cancelTask = cancellableTask.get(); + cancellableTask.remove(); + schedule(nodeClient, queryPlan::execute, timeout, cancelTask); return queryPlan.getQueryId(); } - private void schedule(NodeClient client, Runnable task, TimeValue timeout) { + private void schedule( + NodeClient client, Runnable task, TimeValue timeout, CancellableTask cancelTask) { ThreadPool threadPool = client.threadPool(); Runnable wrappedTask = @@ -60,6 +78,8 @@ private void schedule(NodeClient client, Runnable task, TimeValue timeout) { timeout, ThreadPool.Names.GENERIC); + setCancellableTask(cancelTask); + try { task.run(); timeoutTask.cancel(); @@ -76,6 +96,8 @@ private void schedule(NodeClient client, Runnable task, TimeValue timeout) { } throw e; + } finally { + clearCancellableTask(); } }); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableNestedAggregate.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableNestedAggregate.java index ef569fc2989..58dc62a8469 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableNestedAggregate.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/CalciteEnumerableNestedAggregate.java @@ -79,7 +79,8 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { // TODO implement an enumerable nested aggregate throw new UnsupportedOperationException( String.format( - "Cannot execute nested aggregation on %s since pushdown cannot be applied.", aggCalls)); + "Cannot execute nested aggregation on %s since plugins.calcite.pushdown is disabled.", + aggCalls)); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index f8899ead2eb..d37957d4a9f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -11,7 +11,11 @@ import java.util.Set; import java.util.function.Predicate; import java.util.stream.IntStream; +import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.rules.SubstitutionRule; @@ -112,6 +116,21 @@ protected void apply( // add bucket_nullable = false hint PPLHintUtils.addIgnoreNullBucketHintToAggregate(relBuilder); + // add dedup sort hint if input collation is available. + // + // The collation's field indices refer to the dedup's input row type (i.e. `project`'s output). + // Before handing the hint off to AggregateAnalyzer — which resolves indices against the SCAN's + // row type (`project.getInput()`) — permute each collation index through `project`'s source + // mapping. If any sort key is a computed column (not a bare `RexInputRef`) we can't push it as + // an OS `top_hits` sort, so drop the hint entirely and let Calcite restore order post-dedup. + if (dedup.getInputCollation() != null + && !dedup.getInputCollation().getFieldCollations().isEmpty()) { + RelCollation scanCollation = resolveCollationToScanSchema(dedup, project); + if (scanCollation != null) { + PPLHintUtils.addDedupSortHintToAggregate( + relBuilder, scanCollation, project.getInput().getRowType().getFieldNames()); + } + } // peek the aggregate after hint being added LogicalAggregate aggregate = (LogicalAggregate) relBuilder.build(); assert aggregate.getGroupSet().asList().equals(newGroupByList) @@ -126,6 +145,64 @@ protected void apply( } } + /** + * Rewrite {@code dedup.inputCollation} into scan-schema indices. The collation was captured in + * {@link org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule} against a specific row type; + * by the time we reach this rule Calcite may have swapped in a different input, so the + * collation's indices may be stale. Strategy: + * + *
      + *
    1. If the collation's indices are all valid in {@code project}'s output, permute them + * through {@code project.getProjects()} into scan indices (mirrors {@code + * Project.getMapping} + {@code RelCollations.permute}). + *
    2. Otherwise, resolve each collation position by name: look up {@code + * dedup.inputCollationFieldNames[idx]} in the scan's row type. + *
    + * + * A computed-column sort key (non-{@code RexInputRef}) is not pushable as an OS field sort, so + * returns {@code null} in that case. Returns {@code null} also if any sort key cannot be resolved + * by either path. + */ + private static @Nullable RelCollation resolveCollationToScanSchema( + LogicalDedup dedup, LogicalProject project) { + RelCollation collation = dedup.getInputCollation(); + int projectOutputSize = project.getRowType().getFieldCount(); + int maxIdx = -1; + for (RelFieldCollation fc : collation.getFieldCollations()) { + maxIdx = Math.max(maxIdx, fc.getFieldIndex()); + } + if (maxIdx < projectOutputSize) { + List projections = project.getProjects(); + List remapped = new ArrayList<>(); + for (RelFieldCollation fc : collation.getFieldCollations()) { + RexNode expr = projections.get(fc.getFieldIndex()); + if (!(expr instanceof RexInputRef ref)) { + return null; + } + remapped.add(fc.withFieldIndex(ref.getIndex())); + } + return RelCollations.of(remapped); + } + List originalNames = dedup.getInputCollationFieldNames(); + if (originalNames == null) { + return null; + } + List scanNames = project.getInput().getRowType().getFieldNames(); + List remapped = new ArrayList<>(); + for (RelFieldCollation fc : collation.getFieldCollations()) { + int oldIdx = fc.getFieldIndex(); + if (oldIdx < 0 || oldIdx >= originalNames.size()) { + return null; + } + int scanIdx = scanNames.indexOf(originalNames.get(oldIdx)); + if (scanIdx < 0) { + return null; + } + remapped.add(fc.withFieldIndex(scanIdx)); + } + return RelCollations.of(remapped); + } + @Value.Immutable public interface Config extends OpenSearchRuleConfig { // +- LogicalDedup diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java index e210095b480..b79b58e96c6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java @@ -93,6 +93,7 @@ public RelNode convert(RelNode rel) { convertedSource, convertedLookup, graphLookup.getStartField(), + graphLookup.getStartValues(), graphLookup.getFromField(), graphLookup.getToField(), graphLookup.getOutputField(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 7d8cb8826cd..f919fdc0e30 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -83,6 +83,7 @@ import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -210,9 +211,12 @@ public static Pair, OpenSearchAggregationResponseParser try { final List groupList = aggregate.getGroupSet().asList(); List aggFieldNames = outputFields.subList(groupList.size(), outputFields.size()); + // Extract dedup sort hint if present (may be a multi-field sort) + List dedupSortKeys = PPLHintUtils.getDedupSortKeys(aggregate); // Process all aggregate calls Pair> builderAndParser = - processAggregateCalls(aggFieldNames, aggregate.getAggCallList(), project, helper); + processAggregateCalls( + aggFieldNames, aggregate.getAggCallList(), project, helper, dedupSortKeys); Builder metricBuilder = builderAndParser.getLeft(); List metricParsers = builderAndParser.getRight(); @@ -370,7 +374,8 @@ private static Pair> processAggregateCalls( List aggNames, List aggCalls, Project project, - AggregateAnalyzer.AggregateBuilderHelper helper) + AggregateAnalyzer.AggregateBuilderHelper helper, + List dedupSortKeys) throws PredicateAnalyzer.ExpressionNotAnalyzableException { Builder metricBuilder = new AggregatorFactories.Builder(); List metricParserList = new ArrayList<>(); @@ -382,7 +387,7 @@ private static Pair> processAggregateCalls( String aggName = aggNames.get(i); Pair builderAndParser = - createAggregationBuilderAndParser(aggCall, args, aggName, helper); + createAggregationBuilderAndParser(aggCall, args, aggName, helper, dedupSortKeys); builderAndParser = aggFilterAnalyzer.analyze(builderAndParser, aggCall, aggName); // Nested aggregation (https://docs.opensearch.org/docs/latest/aggregations/bucket/nested/) String nestedPath = @@ -436,11 +441,12 @@ private static Pair createAggregationBuilderAn AggregateCall aggCall, List> args, String aggName, - AggregateAnalyzer.AggregateBuilderHelper helper) { + AggregateAnalyzer.AggregateBuilderHelper helper, + List dedupSortKeys) { if (aggCall.isDistinct()) { return createDistinctAggregation(aggCall, args, aggName, helper); } else { - return createRegularAggregation(aggCall, args, aggName, helper); + return createRegularAggregation(aggCall, args, aggName, helper, dedupSortKeys); } } @@ -467,7 +473,8 @@ private static Pair createRegularAggregation( AggregateCall aggCall, List> args, String aggName, - AggregateBuilderHelper helper) { + AggregateBuilderHelper helper, + List dedupSortKeys) { return switch (aggCall.getAggregation().kind) { case AVG -> @@ -601,6 +608,16 @@ yield switch (functionName) { TopHitsAggregationBuilder topHitsAggregationBuilder = createTopHitsBuilder( aggCall, args, aggName, helper, dedupNumber, false, false, null, null); + // Emit a top_hits sort array that mirrors the original PPL sort collation + // (all fields, in order). Align NULL ordering with PPL/Calcite defaults + // (ASC -> NULLS FIRST, DESC -> NULLS LAST) so dedup picks the same row whether + // pushdown is on or off. + for (PPLHintUtils.DedupSortKey key : dedupSortKeys) { + SortOrder order = "DESC".equals(key.order()) ? SortOrder.DESC : SortOrder.ASC; + String missing = order == SortOrder.ASC ? "_first" : "_last"; + topHitsAggregationBuilder.sort( + SortBuilders.fieldSort(key.field()).order(order).missing(missing)); + } yield Pair.of(topHitsAggregationBuilder, new TopHitsParser(aggName, false, false)); } default -> diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 435cef22ef4..a694e0fea06 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -78,6 +78,10 @@ public static class PushDownUnSupportedException extends RuntimeException { public PushDownUnSupportedException(String message) { super(message); } + + public PushDownUnSupportedException(String message, Throwable cause) { + super(message, cause); + } } /** Constructor. */ diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 8e6dbede58e..cc13c3a4a7a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -577,7 +577,8 @@ private QueryExpression prefix(RexCall call) { throw new PredicateAnalyzerException(message); } - Expression operandExpr = call.getOperands().get(0).accept(this); + RexNode innerOperand = call.getOperands().get(0); + Expression operandExpr = innerOperand.accept(this); // Handle NOT(boolean_field) - Calcite simplifies "field = false" to NOT($field). // In PPL semantics, "field = false" should only match documents where the field is // explicitly false (not null or missing). This is achieved via term query {value: false}. @@ -586,9 +587,36 @@ private QueryExpression prefix(RexCall call) { return QueryExpression.create(namedField).isFalse(); } QueryExpression expr = (QueryExpression) operandExpr; + // For null-intolerant predicates (LIKE, comparisons, equality, etc.), + // negation must also exclude documents where the field is NULL/missing. + // Truth-test operators (IS_TRUE, IS_NULL, etc.) already encode null + // semantics and must NOT get an additional exists filter. + if (isNullIntolerantPredicate(innerOperand) && expr instanceof SimpleQueryExpression sqe) { + return sqe.notWithExistsFilter(); + } return expr.not(); } + /** Returns true if the given RexNode is a null-intolerant predicate (value comparison). */ + private static boolean isNullIntolerantPredicate(RexNode node) { + if (!(node instanceof RexCall innerCall)) { + return false; + } + return switch (innerCall.getKind()) { + case LIKE, + EQUALS, + NOT_EQUALS, + GREATER_THAN, + GREATER_THAN_OR_EQUAL, + LESS_THAN, + LESS_THAN_OR_EQUAL, + BETWEEN, + SEARCH -> + true; + default -> false; + }; + } + private QueryExpression postfix(RexCall call) { checkArgument( call.getKind() == SqlKind.IS_TRUE @@ -725,7 +753,14 @@ private QueryExpression binary(RexCall call) { CompoundQueryExpression.or( expression, QueryExpression.create(pair.getKey()).notExists()); // e.g. where a = 1 or a = 2 - case UNKNOWN -> expression; + // For NOT IN (complemented points), SQL three-valued logic dictates + // NULL NOT IN (...) evaluates to UNKNOWN (not TRUE), so null rows + // must be excluded via an exists filter. + case UNKNOWN -> + isSearchWithComplementedPoints(call) + ? CompoundQueryExpression.and( + false, expression, QueryExpression.create(pair.getKey()).exists()) + : expression; }; finalExpression.updateAnalyzedNodes(call); return finalExpression; @@ -1302,6 +1337,12 @@ public QueryExpression not() { return this; } + /** Negate with an exists filter to exclude null/missing documents. */ + QueryExpression notWithExistsFilter() { + builder = boolQuery().must(existsQuery(getFieldReference())).mustNot(builder()); + return this; + } + @Override public QueryExpression exists() { builder = existsQuery(getFieldReference()); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java index fbe6d3cd723..b09f9346627 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java @@ -8,6 +8,7 @@ import lombok.Getter; import org.json.JSONObject; import org.opensearch.core.rest.RestStatus; +import org.opensearch.sql.common.error.ErrorReport; /** Error Message. */ public class ErrorMessage { @@ -62,12 +63,26 @@ public String toString() { } private JSONObject getErrorAsJson() { - JSONObject errorJson = new JSONObject(); + if (exception instanceof ErrorReport errorReport) { + JSONObject errorJson = new JSONObject(errorReport.toJsonMap()); + // Add 'reason' field for backward compatibility with existing clients + // Use the underlying exception message as 'reason' (broad error description) + // while 'details' contains the more precise handwritten message + if (!errorJson.has("reason")) { + Exception cause = errorReport.getCause(); + String reasonMessage = + cause.getLocalizedMessage() != null ? cause.getLocalizedMessage() : cause.getMessage(); + if (reasonMessage != null) { + errorJson.put("reason", reasonMessage); + } + } + return errorJson; + } + JSONObject errorJson = new JSONObject(); errorJson.put("type", type); errorJson.put("reason", reason); errorJson.put("details", details); - return errorJson; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java index 8617f264f06..b569276e3ee 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactory.java @@ -7,19 +7,25 @@ import lombok.experimental.UtilityClass; import org.opensearch.OpenSearchException; +import org.opensearch.sql.common.error.ErrorReport; @UtilityClass public class ErrorMessageFactory { /** * Create error message based on the exception type. Exceptions of OpenSearch exception type and * exceptions with wrapped OpenSearch exception causes should create {@link - * OpenSearchErrorMessage} + * OpenSearchErrorMessage}. ErrorReport exceptions preserve their context information. * * @param e exception to create error message * @param status exception status code * @return error message */ public static ErrorMessage createErrorMessage(Throwable e, int status) { + // Check for ErrorReport BEFORE unwrapping - we want to preserve the context + if (e instanceof ErrorReport) { + return new ErrorMessage(e, status); + } + Throwable cause = unwrapCause(e); if (cause instanceof OpenSearchException) { OpenSearchException exception = (OpenSearchException) cause; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/FilterType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/FilterType.java new file mode 100644 index 00000000000..cc42bb35f58 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/FilterType.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; +import org.opensearch.sql.exception.ExpressionEvaluationException; + +/** Filter placement strategy for vectorSearch() WHERE clauses. */ +public enum FilterType { + /** WHERE placed in bool.filter outside the knn clause (post-filtering). */ + POST("post"), + + /** WHERE placed inside knn.filter for efficient pre-filtering. */ + EFFICIENT("efficient"); + + private final String value; + + FilterType(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + + private static final Set VALID_VALUES = + Arrays.stream(values()).map(FilterType::getValue).collect(Collectors.toSet()); + + public static FilterType fromString(String str) { + for (FilterType ft : values()) { + if (ft.value.equals(str)) { + return ft; + } + } + throw new ExpressionEvaluationException( + String.format("filter_type must be one of %s, got '%s'", VALID_VALUES, str)); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngine.java index ce6740cd784..1b7de315fb6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngine.java @@ -7,10 +7,13 @@ import static org.opensearch.sql.utils.SystemIndexUtils.isSystemIndex; +import java.util.Collection; +import java.util.List; import lombok.Getter; import lombok.RequiredArgsConstructor; import org.opensearch.sql.DataSourceSchemaName; import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.expression.function.FunctionResolver; import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.storage.system.OpenSearchSystemIndex; import org.opensearch.sql.storage.StorageEngine; @@ -25,6 +28,11 @@ public class OpenSearchStorageEngine implements StorageEngine { @Getter private final Settings settings; + @Override + public Collection getFunctions() { + return List.of(new VectorSearchTableFunctionResolver(client, settings)); + } + @Override public Table getTable(DataSourceSchemaName dataSourceSchemaName, String name) { if (isSystemIndex(name)) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java new file mode 100644 index 00000000000..06727a5462b --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java @@ -0,0 +1,191 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import java.util.Map; +import java.util.function.Function; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.WrapperQueryBuilder; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; +import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScan; +import org.opensearch.sql.opensearch.storage.scan.VectorSearchIndexScan; +import org.opensearch.sql.opensearch.storage.scan.VectorSearchIndexScanBuilder; +import org.opensearch.sql.opensearch.storage.scan.VectorSearchQueryBuilder; +import org.opensearch.sql.storage.read.TableScanBuilder; + +/** + * Vector-search-aware OpenSearch index. Seeds the scan with a knn query and enables score tracking. + */ +public class VectorSearchIndex extends OpenSearchIndex { + + private final String field; + private final float[] vector; + private final Map options; + private final FilterType filterType; // null means default (EFFICIENT) + // Nullable for back-compat with existing tests and the non-vector-search constructor. When + // present, the scan defers a lazy k-NN plugin probe to open() so execution fails fast with a + // clear SQL error if the plugin is missing. + private final KnnPluginCapability knnCapability; + + public VectorSearchIndex( + OpenSearchClient client, + Settings settings, + String indexName, + String field, + float[] vector, + Map options, + FilterType filterType, + KnnPluginCapability knnCapability) { + super(client, settings, indexName); + this.field = field; + this.vector = vector; + this.options = options; + this.filterType = filterType; + this.knnCapability = knnCapability; + } + + public VectorSearchIndex( + OpenSearchClient client, + Settings settings, + String indexName, + String field, + float[] vector, + Map options, + FilterType filterType) { + this(client, settings, indexName, field, vector, options, filterType, null); + } + + /** + * Default constructor — preserves existing call sites; uses no explicit filter type, so the scan + * falls back to the default placement ({@link FilterType#EFFICIENT}). + */ + public VectorSearchIndex( + OpenSearchClient client, + Settings settings, + String indexName, + String field, + float[] vector, + Map options) { + this(client, settings, indexName, field, vector, options, null, null); + } + + @Override + public TableScanBuilder createScanBuilder() { + // _score is not blocked at mapping time, so a user field named _score would collide with the + // synthetic v._score column on the response tuple and fail with an opaque duplicate-key error. + // Reject here so the user sees a clear SQL error (and _explain surfaces the problem without a + // k-NN request). + if (getFieldTypes().containsKey(METADATA_FIELD_SCORE)) { + throw new IllegalArgumentException( + String.format( + "Index '%s' defines a user field named '_score' that collides with the synthetic" + + " _score column exposed by vectorSearch(). Rename the field or query the index" + + " without vectorSearch().", + getIndexName())); + } + final TimeValue cursorKeepAlive = + getSettings().getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE); + var requestBuilder = createRequestBuilder(); + + // Callback for efficient filtering: serialize WHERE QueryBuilder to JSON, + // rebuild knn query with filter embedded. JSON handling stays in this class. + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder(buildKnnQueryJson(whereQuery.toString())); + + boolean filterTypeExplicit = filterType != null; + FilterType effectiveFilterType = filterType != null ? filterType : FilterType.EFFICIENT; + + var queryBuilder = + new VectorSearchQueryBuilder( + requestBuilder, + buildKnnQuery(), + options, + effectiveFilterType, + filterTypeExplicit, + rebuildWithFilter); + requestBuilder.pushDownTrackedScore(true); + + // Default size policy: LIMIT pushdown will further reduce if present. + if (options.containsKey("k")) { + // Top-k mode: default size to k so queries without LIMIT return k results. + requestBuilder.pushDownLimitToRequestTotal(Integer.parseInt(options.get("k")), 0); + } else { + // Radial mode (max_distance/min_score): cap at maxResultWindow. + // Without an explicit cap, radial queries could return unbounded results. + requestBuilder.pushDownLimitToRequestTotal(getMaxResultWindow(), 0); + } + + Function createScanOperator = + rb -> { + var request = + rb.build(getIndexName(), cursorKeepAlive, getClient(), getFieldTypes().isEmpty()); + if (knnCapability != null) { + return new VectorSearchIndexScan( + getClient(), rb.getMaxResponseSize(), request, knnCapability); + } + return new OpenSearchIndexScan(getClient(), rb.getMaxResponseSize(), request); + }; + return new VectorSearchIndexScanBuilder(queryBuilder, createScanOperator); + } + + private QueryBuilder buildKnnQuery() { + return new WrapperQueryBuilder(buildKnnQueryJson()); + } + + // Package-private for testing + String buildKnnQueryJson() { + return buildKnnQueryJson(null); + } + + /** + * Builds knn query JSON, optionally embedding a filter clause for efficient filtering. + * + * @param filterJson serialized filter JSON to embed in knn.field.filter, or null for no filter + */ + String buildKnnQueryJson(String filterJson) { + StringBuilder vectorJson = new StringBuilder("["); + for (int i = 0; i < vector.length; i++) { + if (i > 0) vectorJson.append(","); + vectorJson.append(vector[i]); + } + vectorJson.append("]"); + + StringBuilder optionsJson = new StringBuilder(); + for (Map.Entry entry : options.entrySet()) { + optionsJson.append(","); + String value = entry.getValue(); + // All P0 option values are canonicalized to numeric strings by validateOptions(). + // The quoted fallback is retained for forward compatibility with future non-numeric options. + if (isNumeric(value)) { + optionsJson.append(String.format("\"%s\":%s", entry.getKey(), value)); + } else { + optionsJson.append(String.format("\"%s\":\"%s\"", entry.getKey(), value)); + } + } + + String filterClause = ""; + if (filterJson != null) { + filterClause = String.format(",\"filter\":%s", filterJson); + } + + return String.format( + "{\"knn\":{\"%s\":{\"vector\":%s%s%s}}}", + field, vectorJson.toString(), optionsJson.toString(), filterClause); + } + + private static boolean isNumeric(String str) { + try { + Double.parseDouble(str); + return true; + } catch (NumberFormatException e) { + return false; + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementation.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementation.java new file mode 100644 index 00000000000..c1b5354f4b1 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementation.java @@ -0,0 +1,370 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.opensearch.sql.opensearch.storage.VectorSearchTableFunctionResolver.FIELD; +import static org.opensearch.sql.opensearch.storage.VectorSearchTableFunctionResolver.OPTION; +import static org.opensearch.sql.opensearch.storage.VectorSearchTableFunctionResolver.TABLE; +import static org.opensearch.sql.opensearch.storage.VectorSearchTableFunctionResolver.VECTOR; + +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.TableFunctionImplementation; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; +import org.opensearch.sql.storage.Table; + +public class VectorSearchTableFunctionImplementation extends FunctionExpression + implements TableFunctionImplementation { + + /** + * P0 allowed option keys. Rejects unknown/future keys to prevent unvalidated DSL injection. A + * {@link List} (rather than a {@link Set}) so the unknown-key error message renders the supported + * keys in a stable, user-friendly order. + */ + static final List ALLOWED_OPTION_KEYS = + List.of("k", "max_distance", "min_score", "filter_type"); + + /** + * Field names must be safe for JSON interpolation: alphanumeric, dots (nested), underscores, + * hyphens. Rejects characters that could corrupt the WrapperQueryBuilder JSON. The same regex is + * reused for table names so user-supplied identifiers cannot break out of the JSON context. + */ + private static final Pattern SAFE_FIELD_NAME = Pattern.compile("^[a-zA-Z0-9._\\-]+$"); + + private final FunctionName functionName; + private final List arguments; + private final OpenSearchClient client; + private final Settings settings; + private final KnnPluginCapability knnCapability; + + public VectorSearchTableFunctionImplementation( + FunctionName functionName, + List arguments, + OpenSearchClient client, + Settings settings, + KnnPluginCapability knnCapability) { + super(functionName, arguments); + this.functionName = functionName; + this.arguments = arguments; + this.client = client; + this.settings = settings; + this.knnCapability = knnCapability; + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + throw new UnsupportedOperationException( + String.format("vectorSearch function [%s] is only supported in FROM clause", functionName)); + } + + @Override + public ExprType type() { + return ExprCoreType.STRUCT; + } + + @Override + public String toString() { + List args = + arguments.stream() + .map( + arg -> { + if (arg instanceof NamedArgumentExpression) { + NamedArgumentExpression named = (NamedArgumentExpression) arg; + return String.format("%s=%s", named.getArgName(), named.getValue().toString()); + } + return arg.toString(); + }) + .collect(Collectors.toList()); + return String.format("%s(%s)", functionName, String.join(", ", args)); + } + + @Override + public Table applyArguments() { + // Local validation runs first so that malformed queries return stable SQL validation errors + // regardless of cluster state. The k-NN plugin presence is checked later, lazily at scan + // open() time, so analysis-time paths (_explain, local validation) stay functional on + // clusters without k-NN. + validateNamedArgs(); + String tableName = getArgumentValue(TABLE); + validateTableName(tableName); + String fieldName = getArgumentValue(FIELD); + validateFieldName(fieldName); + String vectorLiteral = getArgumentValue(VECTOR); + String optionStr = getArgumentValue(OPTION); + + float[] vector = parseVector(vectorLiteral); + Map options = parseOptions(optionStr); + validateOptions(options); + + // Strip filter_type — it's a SQL-layer directive, not a knn parameter + FilterType filterType = null; + if (options.containsKey("filter_type")) { + filterType = FilterType.fromString(options.remove("filter_type")); + } + + return new VectorSearchIndex( + client, settings, tableName, fieldName, vector, options, filterType, knnCapability); + } + + private float[] parseVector(String vectorLiteral) { + String cleaned = vectorLiteral.replaceAll("[\\[\\]]", "").trim(); + if (cleaned.isEmpty()) { + throw new ExpressionEvaluationException("Vector literal must not be empty"); + } + // Reject common non-comma separators before Float.parseFloat fails with a generic + // "Invalid vector component" that doesn't hint the user at the separator. + if (cleaned.indexOf(';') >= 0 || cleaned.indexOf(':') >= 0 || cleaned.indexOf('|') >= 0) { + throw new ExpressionEvaluationException( + String.format( + "Invalid vector literal '%s': vector= requires comma-separated components," + + " e.g., vector='[1.0,2.0,3.0]'", + vectorLiteral)); + } + // Preserve trailing empties (split(",", -1)) so malformed literals like "[1.0,]" or + // "[1.0,,2.0]" surface an explicit error instead of silently shrinking the vector. + String[] parts = cleaned.split(",", -1); + float[] vector = new float[parts.length]; + for (int i = 0; i < parts.length; i++) { + String component = parts[i].trim(); + if (component.isEmpty()) { + throw new ExpressionEvaluationException( + String.format( + "Invalid vector component at position %d: must be a number (check for" + + " trailing or consecutive commas in '%s')", + i, vectorLiteral)); + } + try { + vector[i] = Float.parseFloat(component); + } catch (NumberFormatException e) { + throw new ExpressionEvaluationException( + String.format("Invalid vector component '%s': must be a number", component)); + } + if (!Float.isFinite(vector[i])) { + throw new ExpressionEvaluationException( + String.format("Invalid vector component '%s': must be a finite number", component)); + } + } + return vector; + } + + static Map parseOptions(String optionStr) { + Map options = new LinkedHashMap<>(); + // A wholly empty option string is handled downstream with a clearer "missing required option" + // message than a generic malformed-segment error. + if (optionStr.trim().isEmpty()) { + return options; + } + // split(",", -1) preserves trailing empties so malformed inputs like "k=5," or "k=5,,k2=v" + // surface an explicit error instead of being silently dropped. + String[] pairs = optionStr.split(",", -1); + for (String pair : pairs) { + String trimmed = pair.trim(); + if (trimmed.isEmpty()) { + throw new ExpressionEvaluationException( + "Malformed option segment '': expected key=value (check for trailing or" + + " consecutive commas)"); + } + String[] kv = trimmed.split("=", 2); + if (kv.length != 2 || kv[0].trim().isEmpty() || kv[1].trim().isEmpty()) { + throw new ExpressionEvaluationException( + String.format("Malformed option segment '%s': expected key=value", trimmed)); + } + String key = kv[0].trim(); + if (options.containsKey(key)) { + throw new ExpressionEvaluationException(String.format("Duplicate option key '%s'", key)); + } + options.put(key, kv[1].trim()); + } + return options; + } + + /** + * Reject non-named arguments, null arg names, and duplicate named arguments early. Runs before + * any list-index-based lookup so a malformed argument list can never cause an AIOOBE downstream. + */ + private void validateNamedArgs() { + HashSet seen = new HashSet<>(); + for (Expression arg : arguments) { + if (!(arg instanceof NamedArgumentExpression)) { + throw new ExpressionEvaluationException( + "vectorSearch() requires named arguments (e.g., table='index'), " + + "but received: " + + arg.getClass().getSimpleName()); + } + String name = ((NamedArgumentExpression) arg).getArgName(); + if (name == null || name.isEmpty()) { + throw new ExpressionEvaluationException( + "vectorSearch() requires named arguments (e.g., table='index'), " + + "but received an argument with no name"); + } + if (!seen.add(name.toLowerCase(java.util.Locale.ROOT))) { + throw new ExpressionEvaluationException( + "Duplicate argument name '" + + name + + "' in vectorSearch(); each named argument may appear at most once"); + } + } + } + + /** + * Reject table names with characters that could corrupt the WrapperQueryBuilder JSON or escape + * the target index name. Allows alphanumeric, dots, underscores, and hyphens (the characters + * OpenSearch index names already permit). Explicitly rejects wildcards ('*') and multi-target + * patterns (comma-separated) with a dedicated message, because vectorSearch() targets a single + * concrete index or alias and fan-out patterns would otherwise fall through to the generic regex + * message. Also rejects the `_all` routing target and the pathologic `.` / `..` names because + * those either fan out to every index or are not valid concrete index names. Other native-invalid + * names (leading dot, leading hyphen, bare underscore, uppercase, and so on) are intentionally + * passed through for the OpenSearch client to reject with its own error message. + */ + private void validateTableName(String tableName) { + // Dedicated error for fan-out patterns ('*' and ',') before the generic regex; see Javadoc + // for why vectorSearch() targets a single index. + if (tableName.indexOf('*') >= 0 || tableName.indexOf(',') >= 0) { + throw new ExpressionEvaluationException( + String.format( + "Invalid table name '%s': vectorSearch() requires a single concrete index or alias;" + + " wildcards ('*') and multi-target patterns (comma-separated) are not" + + " supported", + tableName)); + } + if (!SAFE_FIELD_NAME.matcher(tableName).matches()) { + throw new ExpressionEvaluationException( + String.format( + "Invalid table name '%s': must contain only alphanumeric characters," + + " dots, underscores, or hyphens", + tableName)); + } + String lower = tableName.toLowerCase(java.util.Locale.ROOT); + if (lower.equals("_all") || tableName.equals(".") || tableName.equals("..")) { + throw new ExpressionEvaluationException( + String.format( + "Invalid table name '%s': vectorSearch() requires a single concrete index or alias;" + + " '_all', '.', and '..' are not supported", + tableName)); + } + } + + /** + * Reject field names with characters that could corrupt the WrapperQueryBuilder JSON. Allows + * alphanumeric, dots (nested fields), underscores, and hyphens. + */ + private void validateFieldName(String fieldName) { + if (!SAFE_FIELD_NAME.matcher(fieldName).matches()) { + throw new ExpressionEvaluationException( + String.format( + "Invalid field name '%s': must contain only alphanumeric characters," + + " dots, underscores, or hyphens", + fieldName)); + } + } + + /** + * Validates and canonicalizes option values. All P0 option values must be numeric. Parsing them + * here prevents non-numeric strings from reaching the raw JSON construction in buildKnnQuery(). + */ + private void validateOptions(Map options) { + // Reject unknown option keys — only P0 keys are allowed + for (String key : options.keySet()) { + if (!ALLOWED_OPTION_KEYS.contains(key)) { + throw new ExpressionEvaluationException( + String.format("Unknown option key '%s'. Supported keys: %s", key, ALLOWED_OPTION_KEYS)); + } + } + if (options.containsKey("filter_type")) { + // Validate early — fromString throws if invalid + FilterType.fromString(options.get("filter_type")); + } + boolean hasK = options.containsKey("k"); + boolean hasMaxDistance = options.containsKey("max_distance"); + boolean hasMinScore = options.containsKey("min_score"); + if (!hasK && !hasMaxDistance && !hasMinScore) { + throw new ExpressionEvaluationException( + "Missing required option: one of k, max_distance, or min_score"); + } + // Mutual exclusivity: exactly one search mode allowed + int modeCount = (hasK ? 1 : 0) + (hasMaxDistance ? 1 : 0) + (hasMinScore ? 1 : 0); + if (modeCount > 1) { + throw new ExpressionEvaluationException( + "Only one of k, max_distance, or min_score may be specified"); + } + // Parse and canonicalize numeric values — closes JSON injection via option values + if (hasK) { + int k = parseIntOption(options, "k"); + if (k < 1 || k > 10000) { + throw new ExpressionEvaluationException( + String.format("k must be between 1 and 10000, got %d", k)); + } + } + if (hasMaxDistance) { + double maxDistance = parseDoubleOption(options, "max_distance"); + if (maxDistance < 0) { + throw new ExpressionEvaluationException( + String.format( + "max_distance must be non-negative, got %s", options.get("max_distance"))); + } + } + if (hasMinScore) { + double minScore = parseDoubleOption(options, "min_score"); + if (minScore < 0) { + throw new ExpressionEvaluationException( + String.format("min_score must be non-negative, got %s", options.get("min_score"))); + } + } + } + + private int parseIntOption(Map options, String key) { + try { + int value = Integer.parseInt(options.get(key)); + options.put(key, Integer.toString(value)); + return value; + } catch (NumberFormatException e) { + throw new ExpressionEvaluationException( + String.format("Option '%s' must be an integer, got '%s'", key, options.get(key))); + } + } + + private double parseDoubleOption(Map options, String key) { + try { + double value = Double.parseDouble(options.get(key)); + if (!Double.isFinite(value)) { + throw new ExpressionEvaluationException( + String.format("Option '%s' must be a finite number, got '%s'", key, options.get(key))); + } + options.put(key, Double.toString(value)); + return value; + } catch (NumberFormatException e) { + throw new ExpressionEvaluationException( + String.format("Option '%s' must be a number, got '%s'", key, options.get(key))); + } + } + + private String getArgumentValue(String name) { + return arguments.stream() + .filter(arg -> ((NamedArgumentExpression) arg).getArgName().equalsIgnoreCase(name)) + .map(arg -> ((NamedArgumentExpression) arg).getValue().valueOf().stringValue()) + .findFirst() + .orElseThrow( + () -> + new ExpressionEvaluationException( + String.format("Missing required argument: %s", name))); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolver.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolver.java new file mode 100644 index 00000000000..8db1f270afd --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolver.java @@ -0,0 +1,109 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.HashSet; +import java.util.List; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.function.FunctionBuilder; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.FunctionResolver; +import org.opensearch.sql.expression.function.FunctionSignature; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; + +public class VectorSearchTableFunctionResolver implements FunctionResolver { + + public static final String VECTOR_SEARCH = "vectorsearch"; + public static final String TABLE = "table"; + public static final String FIELD = "field"; + public static final String VECTOR = "vector"; + public static final String OPTION = "option"; + public static final List ARGUMENT_NAMES = List.of(TABLE, FIELD, VECTOR, OPTION); + + private final OpenSearchClient client; + private final Settings settings; + private final KnnPluginCapability knnCapability; + + public VectorSearchTableFunctionResolver(OpenSearchClient client, Settings settings) { + this(client, settings, new KnnPluginCapability(client)); + } + + VectorSearchTableFunctionResolver( + OpenSearchClient client, Settings settings, KnnPluginCapability knnCapability) { + this.client = client; + this.settings = settings; + this.knnCapability = knnCapability; + } + + @Override + public Pair resolve(FunctionSignature unresolvedSignature) { + FunctionName functionName = FunctionName.of(VECTOR_SEARCH); + FunctionSignature functionSignature = + new FunctionSignature(functionName, List.of(STRING, STRING, STRING, STRING)); + FunctionBuilder functionBuilder = + (functionProperties, arguments) -> { + validateArguments(arguments); + return new VectorSearchTableFunctionImplementation( + functionName, arguments, client, settings, knnCapability); + }; + return Pair.of(functionSignature, functionBuilder); + } + + @Override + public FunctionName getFunctionName() { + return FunctionName.of(VECTOR_SEARCH); + } + + private void validateArguments(List arguments) { + if (arguments.size() != ARGUMENT_NAMES.size()) { + throw new ExpressionEvaluationException( + String.format( + "vectorSearch requires %d arguments (%s), got %d", + ARGUMENT_NAMES.size(), String.join(", ", ARGUMENT_NAMES), arguments.size())); + } + // Shape check at the resolver so positional or unknown-named args produce a clean 400 before + // planning proceeds. The Implementation layer repeats the non-named and duplicate-name checks + // as defense-in-depth; the unknown-name allowlist is enforced only here because the + // Implementation looks up values by known keys and does not need to re-validate the allowlist. + HashSet seen = new HashSet<>(); + for (Expression arg : arguments) { + if (!(arg instanceof NamedArgumentExpression)) { + throw new ExpressionEvaluationException( + "vectorSearch() requires named arguments (e.g., table='index'), " + + "but received: " + + arg.getClass().getSimpleName()); + } + String name = ((NamedArgumentExpression) arg).getArgName(); + if (name == null || name.isEmpty()) { + throw new ExpressionEvaluationException( + "vectorSearch() requires named arguments (e.g., table='index'), " + + "but received an argument with no name"); + } + String lower = name.toLowerCase(java.util.Locale.ROOT); + if (!ARGUMENT_NAMES.contains(lower)) { + throw new ExpressionEvaluationException( + String.format( + "Unknown argument name '%s' in vectorSearch(); allowed names are %s", + name, ARGUMENT_NAMES)); + } + if (!seen.add(lower)) { + throw new ExpressionEvaluationException( + "Duplicate argument name '" + + name + + "' in vectorSearch(); each named argument may appear at most once"); + } + } + // At this point `seen` holds exactly ARGUMENT_NAMES.size() entries (no duplicates, no unknowns, + // and arity matches), so every required name is present. No separate missing-name check needed. + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapability.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapability.java new file mode 100644 index 00000000000..9ba59915e1d --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapability.java @@ -0,0 +1,92 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.capability; + +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicReference; +import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest; +import org.opensearch.action.admin.cluster.node.info.NodesInfoResponse; +import org.opensearch.action.admin.cluster.node.info.PluginsAndModules; +import org.opensearch.plugins.PluginInfo; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.transport.client.node.NodeClient; + +/** + * Probes the cluster's Nodes Info API once and caches whether the k-NN plugin is installed, so + * vectorSearch() fails fast with a clear error when the plugin is absent instead of surfacing a + * native OpenSearch error deep in execution. + * + *

    The probe requires a {@link NodeClient}. In REST-client mode (standalone SQL service) the node + * client is absent and the check is skipped — execution-time errors remain the signal there. + * + *

    The check runs lazily at scan open() — i.e. only when a vectorSearch() query is actually + * executed — so analysis-time paths like _explain and local argument validation keep working on + * clusters without k-NN. + */ +public class KnnPluginCapability { + + /** + * Canonical k-NN plugin class. Using the class name (not artifact name) so the check is stable + * across packaging variants. + */ + private static final String KNN_PLUGIN_CLASSNAME = "org.opensearch.knn.plugin.KNNPlugin"; + + private final OpenSearchClient client; + private final AtomicReference cached = new AtomicReference<>(); + + public KnnPluginCapability(OpenSearchClient client) { + this.client = client; + } + + /** + * Throws {@link ExpressionEvaluationException} with a user-facing message if the k-NN plugin is + * not installed on any node in the cluster. The result is cached after the first successful + * probe; probe failures are not cached so the next call retries. + */ + public void requireInstalled() { + Boolean hit = cached.get(); + if (hit == null) { + Optional probed = probe(); + if (probed.isEmpty()) { + // Probe unavailable (REST-client mode, no NodeClient). Don't block — execution-time + // errors will surface if k-NN is genuinely missing. + return; + } + hit = probed.get(); + cached.set(hit); + } + if (!hit) { + throw new ExpressionEvaluationException( + "vectorSearch() requires the k-NN plugin, which is not installed on this cluster." + + " Install opensearch-knn or use a cluster that has it."); + } + } + + private Optional probe() { + Optional maybeNode = client.getNodeClient(); + if (maybeNode.isEmpty()) { + return Optional.empty(); + } + NodeClient node = maybeNode.get(); + try { + NodesInfoRequest request = new NodesInfoRequest().clear().addMetric("plugins"); + NodesInfoResponse response = node.admin().cluster().nodesInfo(request).actionGet(); + boolean installed = + response.getNodes().stream() + .map(info -> info.getInfo(PluginsAndModules.class)) + .filter(Objects::nonNull) + .flatMap(p -> p.getPluginInfos().stream()) + .map(PluginInfo::getClassname) + .anyMatch(KNN_PLUGIN_CLASSNAME::equals); + return Optional.of(installed); + } catch (Exception e) { + // Probe failed (IO error, timeout). Don't cache — let the next call retry. + return Optional.empty(); + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index edbd1b06393..609a5aaa92f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -60,8 +60,6 @@ import org.opensearch.sql.opensearch.request.PredicateAnalyzer; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -177,7 +175,7 @@ public double estimateRowCount(RelMetadataQuery mq) { switch (operation.type()) { case AGGREGATION -> { dRows = mq.getRowCount((RelNode) operation.digest()); - dCpu += dRows * getAggMultiplier(operation); + dCpu += dRows * getAggMultiplier(operation, pushDownContext); } // Ignored Project and Highlight in cost accumulation, but they affect the external cost case PROJECT, HIGHLIGHT -> {} @@ -236,7 +234,8 @@ public double estimateRowCount(RelMetadataQuery mq) { } /** See source in {@link org.apache.calcite.rel.core.Aggregate::computeSelfCost} */ - private static float getAggMultiplier(PushDownOperation operation) { + private static float getAggMultiplier( + PushDownOperation operation, PushDownContext pushDownContext) { // START CALCITE List aggCalls = ((Aggregate) operation.digest()).getAggCallList(); float multiplier = 1f + (float) aggCalls.size() * 0.125f; @@ -251,7 +250,9 @@ private static float getAggMultiplier(PushDownOperation operation) { // For script aggregation, we need to multiply the multiplier by 1.1 to make up the cost. As we // prefer to have non-script agg push down after optimized by {@link PPLAggregateConvertRule} - multiplier *= (float) Math.pow(1.1f, ((AggPushDownAction) operation.action()).getScriptCount()); + long scriptCount = + pushDownContext.getAggSpec() == null ? 0 : pushDownContext.getAggSpec().getScriptCount(); + multiplier *= (float) Math.pow(1.1f, scriptCount); return multiplier; } @@ -328,10 +329,11 @@ && isAnyCollationNameInAggregators(collationNames)) { Object digest; if (pushDownContext.isAggregatePushed()) { // Push down the sort into the aggregation bucket - action = - (AggregationBuilderAction) - aggAction -> - aggAction.pushDownSortIntoAggBucket(collations, getRowType().getFieldNames()); + pushDownContextWithoutSort.setAggSpec( + pushDownContextWithoutSort + .getAggSpec() + .withBucketSort(collations, getRowType().getFieldNames())); + action = (OSRequestBuilderAction) requestBuilder -> {}; digest = collations; pushDownContextWithoutSort.add(PushDownType.SORT, digest, action); return buildScan( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index dd9e3c3f6bc..adbd01e3a04 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -66,9 +66,9 @@ public class CalciteEnumerableGraphLookup extends GraphLookup implements Enumera * @param cluster Cluster * @param traitSet Trait set (must include EnumerableConvention) * @param source Source table RelNode - * @param lookup Lookup table RelNode // * @param lookupIndex OpenSearchIndex for the lookup table - * (extracted from lookup RelNode) - * @param startField Field name for start entities + * @param lookup Lookup table RelNode + * @param startField Field name for start entities (null in literal start mode) + * @param startValues Literal start values for top-level graphLookup (null in piped mode) * @param fromField Field name for outgoing edges * @param toField Field name for incoming edges * @param outputField Name of the output array field @@ -85,7 +85,8 @@ public CalciteEnumerableGraphLookup( RelTraitSet traitSet, RelNode source, RelNode lookup, - String startField, + @Nullable String startField, + @Nullable List startValues, String fromField, String toField, String outputField, @@ -102,6 +103,7 @@ public CalciteEnumerableGraphLookup( source, lookup, startField, + startValues, fromField, toField, outputField, @@ -122,6 +124,7 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { inputs.get(0), inputs.get(1), startField, + startValues, fromField, toField, outputField, @@ -180,12 +183,13 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec private final CalciteEnumerableIndexScan lookupScan; private final Enumerator<@Nullable Object> sourceEnumerator; private final List lookupFields; - private final int startFieldIndex; + private int startFieldIndex; private final int fromFieldIdx; private final int toFieldIdx; private Object[] current = null; private boolean batchModeCompleted = false; + private boolean literalStartCompleted = false; @SuppressWarnings("unchecked") GraphLookupEnumerator(CalciteEnumerableGraphLookup graphLookup) { @@ -203,8 +207,11 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec } // When usePIT is true, no limit is set, allowing PIT-based pagination for complete results - // Get the source enumerator - if (graphLookup.getSource() instanceof Scannable scannable) { + // Get the source enumerator (null for literal start mode) + if (graphLookup.getStartValues() != null) { + this.sourceEnumerator = null; + this.startFieldIndex = -1; + } else if (graphLookup.getSource() instanceof Scannable scannable) { Enumerable sourceEnum = scannable.scan(); this.sourceEnumerator = (Enumerator<@Nullable Object>) sourceEnum.enumerator(); } else { @@ -213,12 +220,15 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec } try { - List sourceFields = graphLookup.getSource().getRowType().getFieldNames(); this.lookupFields = graphLookup.getLookup().getRowType().getFieldNames(); - this.startFieldIndex = sourceFields.indexOf(graphLookup.getStartField()); this.fromFieldIdx = lookupFields.indexOf(graphLookup.fromField); this.toFieldIdx = lookupFields.indexOf(graphLookup.toField); + if (graphLookup.getStartValues() == null) { + List sourceFields = graphLookup.getSource().getRowType().getFieldNames(); + this.startFieldIndex = sourceFields.indexOf(graphLookup.getStartField()); + } + // Push down user-specified filter to the lookup scan if (graphLookup.filter != null) { List schema = graphLookup.getLookup().getRowType().getFieldNames(); @@ -236,26 +246,51 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec } } } catch (Exception e) { - sourceEnumerator.close(); + if (sourceEnumerator != null) { + sourceEnumerator.close(); + } throw e; } } @Override public Object current() { + // Literal start mode: single column output, Calcite expects scalar value + if (graphLookup.getStartValues() != null) { + return current[0]; + } // source fields + output array (normal mode) or [source array, lookup array] (batch mode) return current; } @Override public boolean moveNext() { - if (graphLookup.batchMode) { + if (graphLookup.getStartValues() != null) { + return moveNextLiteralStartMode(); + } else if (graphLookup.batchMode) { return moveNextBatchMode(); } else { return moveNextNormalMode(); } } + /** + * Literal start mode: perform single BFS seeded with all literal start values, return one row. + */ + private boolean moveNextLiteralStartMode() { + if (literalStartCompleted) { + return false; + } + literalStartCompleted = true; + + // Perform single BFS seeded with all literal start values + List bfsResults = performBfs(graphLookup.getStartValues()); + + // Output single row: just the hierarchy array + current = new Object[] {bfsResults}; + return true; + } + /** * Batch mode: collect all source start values, perform unified BFS, return single aggregated * row. @@ -541,13 +576,18 @@ private void collectValues(Object value, List collector, Set vis @Override public void reset() { - sourceEnumerator.reset(); + if (sourceEnumerator != null) { + sourceEnumerator.reset(); + } current = null; + literalStartCompleted = false; } @Override public void close() { - sourceEnumerator.close(); + if (sourceEnumerator != null) { + sourceEnumerator.close(); + } } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 4d32562f2fd..740801ff418 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -41,7 +41,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; -import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; import org.opensearch.sql.ast.tree.HighlightConfig; import org.opensearch.sql.calcite.plan.HighlightPushDown; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; @@ -59,8 +58,7 @@ import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.context.AbstractAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; +import org.opensearch.sql.opensearch.storage.scan.context.AggSpec; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -296,7 +294,7 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { AbstractAction action; if (pushDownContext.isAggregatePushed()) { // For aggregate, we do nothing on query builder but only change the schema of the scan. - action = (AggregationBuilderAction) aggAction -> {}; + action = (OSRequestBuilderAction) requestBuilder -> {}; } else { action = (OSRequestBuilderAction) @@ -332,13 +330,8 @@ private RelTraitSet reIndexCollations(List selectedColumns) { public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { try { - if (!pushDownContext.isAggregatePushed()) return null; - List aggregationBuilders = - pushDownContext.getAggPushDownAction().getBuilderAndParser().getLeft(); - if (aggregationBuilders.size() != 1) { - return null; - } - if (!(aggregationBuilders.getFirst() instanceof CompositeAggregationBuilder)) { + AggSpec aggSpec = pushDownContext.getAggSpec(); + if (aggSpec == null || !aggSpec.isCompositeAggregation()) { return null; } List collationNames = getCollationNames(sort.getCollation().getFieldCollations()); @@ -346,11 +339,9 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { return null; } CalciteLogicalIndexScan newScan = copyWithNewTraitSet(sort.getTraitSet()); - newScan - .pushDownContext - .getAggPushDownAction() - .rePushDownSortAggMeasure( - sort.getCollation().getFieldCollations(), rowType.getFieldNames()); + newScan.pushDownContext.setAggSpec( + aggSpec.withSortMeasure( + sort.getCollation().getFieldCollations(), rowType.getFieldNames())); AbstractAction action = (OSRequestBuilderAction) requestAction -> requestAction.resetRequestTotal(); Object digest = sort.getCollation().getFieldCollations(); @@ -367,7 +358,7 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { public CalciteLogicalIndexScan pushDownRareTop(Project project, RareTopDigest digest) { try { CalciteLogicalIndexScan newScan = copyWithNewSchema(project.getRowType()); - newScan.pushDownContext.getAggPushDownAction().rePushDownRareTop(digest); + newScan.pushDownContext.setAggSpec(pushDownContext.getAggSpec().withRareTop(digest)); AbstractAction action = (OSRequestBuilderAction) requestAction -> requestAction.resetRequestTotal(); newScan.pushDownContext.add(PushDownType.RARE_TOP, digest, action); @@ -424,9 +415,13 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, @Nullable Project OpenSearchDataType.of( OpenSearchTypeFactory.convertRelDataTypeToExprType( field.getType())))); - AggPushDownAction action = - new AggPushDownAction(builderAndParser, extendedTypeMapping, bucketNames); - newScan.pushDownContext.add(PushDownType.AGGREGATION, aggregate, action); + AggSpec aggSpec = AggSpec.create(extendedTypeMapping, bucketNames, builderAndParser); + // AggPushDownAction is lazily materialized by AggSpec.buildAction() and then this action + // will materialize agg request builder. + // The AGGREGATION pushdown operation in PushDownContext remains a no-op marker here. + newScan.pushDownContext.setAggSpec(aggSpec); + newScan.pushDownContext.add( + PushDownType.AGGREGATION, aggregate, (OSRequestBuilderAction) requestBuilder -> {}); return newScan; } catch (Exception e) { if (LOG.isDebugEnabled()) { @@ -440,9 +435,7 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of try { if (pushDownContext.isAggregatePushed()) { int totalSize = limit + offset; - // Since the AggPushDownAction is shared among different PushDownContext, its size() may be - // inaccurate(<= the actual size). - // So take the previous limit into account to decide whether it can update the context. + AggSpec aggSpec = pushDownContext.getAggSpec(); boolean canReduceEstimatedRowsCount = !pushDownContext.isLimitPushed() || pushDownContext.getQueue().reversed().stream() @@ -452,27 +445,20 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of .map(op -> (LimitDigest) op.digest()) .map(d -> totalSize < d.offset() + d.limit()) .orElse(true); - - // Push down the limit into the aggregation bucket in advance to detect whether the limit - // can update the aggregation builder boolean canUpdate = - canReduceEstimatedRowsCount - || pushDownContext.getAggPushDownAction().pushDownLimitIntoBucketSize(totalSize); + canReduceEstimatedRowsCount || aggSpec.canPushDownLimitIntoBucketSize(totalSize); if (!canUpdate && offset > 0) return null; CalciteLogicalIndexScan newScan = this.copyWithNewSchema(getRowType()); if (canUpdate) { - newScan - .pushDownContext - .getAggPushDownAction() - .pushDownLimitIntoBucketSize(limit + offset); + newScan.pushDownContext.setAggSpec(aggSpec.withLimit(limit + offset)); } AbstractAction action; - if (pushDownContext.getAggPushDownAction().isCompositeAggregation()) { + if (newScan.pushDownContext.getAggSpec().isCompositeAggregation()) { action = (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownLimitToRequestTotal(limit, offset); } else { - action = (AggregationBuilderAction) aggAction -> {}; + action = (OSRequestBuilderAction) requestBuilder -> {}; } newScan.pushDownContext.add(PushDownType.LIMIT, new LimitDigest(limit, offset), action); return offset > 0 ? sort.copy(sort.getTraitSet(), List.of(newScan)) : newScan; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java index 6af9ad1e8d8..b0315e68eab 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java @@ -11,13 +11,16 @@ import lombok.EqualsAndHashCode; import lombok.ToString; import org.apache.calcite.linq4j.Enumerator; +import org.opensearch.core.tasks.TaskCancelledException; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.exception.NonFallbackCalciteException; import org.opensearch.sql.expression.HighlightExpression; import org.opensearch.sql.monitor.ResourceMonitor; import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.executor.OpenSearchQueryManager; import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.tasks.CancellableTask; /** * Supports a simple iteration over a collection for OpenSearch index @@ -55,6 +58,8 @@ public class OpenSearchIndexEnumerator implements Enumerator { private ExprValue current = null; + private CancellableTask cancellableTask; + public OpenSearchIndexEnumerator( OpenSearchClient client, List fields, @@ -80,6 +85,7 @@ public OpenSearchIndexEnumerator( this.client = client; this.bgScanner = new BackgroundSearchScanner(client, maxResultWindow, queryBucketSize); this.bgScanner.startScanning(request); + this.cancellableTask = OpenSearchQueryManager.getCancellableTask(); } private Iterator fetchNextBatch() { @@ -112,6 +118,10 @@ public boolean moveNext() { return false; } + if (cancellableTask != null && cancellableTask.isCancelled()) { + throw new TaskCancelledException("The task is cancelled."); + } + boolean shouldCheck = (queryCount % NUMBER_OF_NEXT_CALL_TO_CHECK == 0); if (shouldCheck) { org.opensearch.sql.monitor.ResourceStatus status = this.monitor.getStatus(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index 70e6f0f2157..af9d46cd745 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -45,8 +45,8 @@ public OpenSearchIndexScanBuilder( this.scanFactory = scanFactory; } - /** Constructor used for unit tests. */ - protected OpenSearchIndexScanBuilder( + /** Constructor that accepts a custom PushDownQueryBuilder delegate. */ + public OpenSearchIndexScanBuilder( PushDownQueryBuilder translator, Function scanFactory) { this.delegate = translator; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScan.java new file mode 100644 index 00000000000..86d1934f132 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScan.java @@ -0,0 +1,37 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; + +/** + * OpenSearch scan for vector-search relations. Delegates everything to {@link OpenSearchIndexScan} + * except for {@link #open()}, where it first verifies the k-NN plugin is installed so we fail fast + * with a clear SQL error before the native request would fail deep in execution. The check is + * deferred to open() (not applyArguments() or the scan builder) so that analysis-time paths like + * _explain continue to work on clusters without k-NN. + */ +public class VectorSearchIndexScan extends OpenSearchIndexScan { + + private final KnnPluginCapability knnCapability; + + public VectorSearchIndexScan( + OpenSearchClient client, + int maxResponseSize, + OpenSearchRequest request, + KnnPluginCapability knnCapability) { + super(client, maxResponseSize, request); + this.knnCapability = knnCapability; + } + + @Override + public void open() { + knnCapability.requireInstalled(); + super.open(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilder.java new file mode 100644 index 00000000000..a898ac41299 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilder.java @@ -0,0 +1,160 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import java.util.function.Function; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.logical.LogicalProject; +import org.opensearch.sql.planner.logical.LogicalSort; + +/** + * Scan builder for vector search relations. + * + *

    Rejects planner shapes that the SQL surface cannot express safely: + * + *

      + *
    • Aggregations — native OpenSearch k-NN supports aggregations alongside similarity + * search, but the SQL layer does not plumb them through, so we fail fast rather than return + * silently unaggregated results. + *
    • Outer operators over a vectorSearch() subquery — when vectorSearch() is wrapped in a + * subquery (e.g. {@code SELECT * FROM (SELECT v.id FROM vectorSearch(...) AS v) t WHERE + * t.price < 150}), outer WHERE / ORDER BY / OFFSET / GROUP BY / aggregation / DISTINCT do not + * participate in the vectorSearch pushdown contract (the inner {@link LogicalProject} sits + * between the outer operator and this scan builder, so those nodes never match the + * direct-adjacency push-down patterns). They would then be applied in memory after + * top-k results have been selected by vector distance, which can silently yield zero rows or + * mis-ordered results. We detect these shapes in {@link #validatePlan(LogicalPlan)} and + * reject with a clear error. + *
    + */ +public class VectorSearchIndexScanBuilder extends OpenSearchIndexScanBuilder { + + public VectorSearchIndexScanBuilder( + PushDownQueryBuilder translator, + Function scanFactory) { + super(translator, scanFactory); + } + + @Override + public boolean pushDownAggregation(LogicalAggregation aggregation) { + throw new ExpressionEvaluationException( + "Aggregations are not supported on vectorSearch() relations."); + } + + /** + * Walk the fully-optimized plan and reject outer-operator-over-subquery shapes. We look for an + * outer {@link LogicalFilter}, {@link LogicalSort}, {@link LogicalLimit} with non-zero offset, or + * {@link LogicalAggregation} whose descendant chain reaches this scan builder through one or more + * {@link LogicalProject} nodes (the subquery-boundary marker). An operator directly above this + * scan builder is fine — those go through the push-down contract in the query builder. + */ + @Override + public void validatePlan(LogicalPlan root) { + checkForOuterOperator(root, null, false); + } + + /** + * Recursive walker that tracks the outermost "risky" operator seen on the current walk path and + * whether a {@link LogicalProject} has been crossed since then: + * + *
      + *
    • {@code outerOp} — name of the outermost filter/sort/offset/aggregation ancestor, or + * {@code null} if none. Projects only matter below such an operator — without one, a + * project is just the outer SELECT and should not trigger rejection. + *
    • {@code sawProjectSinceOuter} — true iff a {@link LogicalProject} has been seen between + * the outermost risky ancestor and the current position. Once separation by a Project has + * been established, it is permanent — a lower {@link LogicalFilter} below the Project does + * not undo the outer boundary. + *
    + * + *

    This matters for shapes like {@code Filter(outer) -> Project(subquery) -> Filter(inner) -> + * Scan}, where the outer predicate is still blocked from reaching the push-down contract by the + * subquery Project regardless of the inner filter. Resetting on the inner filter would make the + * walker miss this shape. + */ + private void checkForOuterOperator( + LogicalPlan node, String outerOp, boolean sawProjectSinceOuter) { + if (node == this) { + if (outerOp != null && sawProjectSinceOuter) { + throw new ExpressionEvaluationException(rejectionMessage(outerOp)); + } + return; + } + String nextOuterOp = outerOp; + boolean nextSawProject = sawProjectSinceOuter; + if (outerOp == null) { + String operator = classifyOuterOperator(node); + if (operator != null) { + nextOuterOp = operator; + } + } else if (node instanceof LogicalProject) { + nextSawProject = true; + } + for (LogicalPlan child : node.getChild()) { + checkForOuterOperator(child, nextOuterOp, nextSawProject); + } + } + + /** + * Returns a user-facing label for operators that cannot safely sit above a vectorSearch() + * subquery, or {@code null} for operators that are fine (Project, scan, etc.). {@link + * LogicalLimit} with {@code offset == 0} is safe — plain LIMIT wrapping a subquery just caps the + * row count. Non-zero OFFSET skips top-k rows by distance and is rejected. + */ + private static String classifyOuterOperator(LogicalPlan node) { + if (node instanceof LogicalFilter) { + return "WHERE"; + } + if (node instanceof LogicalSort) { + return "ORDER BY"; + } + if (node instanceof LogicalAggregation) { + return "GROUP BY / aggregation / DISTINCT"; + } + if (node instanceof LogicalLimit) { + Integer offset = ((LogicalLimit) node).getOffset(); + if (offset != null && offset != 0) { + return "OFFSET"; + } + } + return null; + } + + // Operator-specific messages: the generic "move it inside the subquery" advice is only right + // for WHERE and for ORDER BY _score DESC. OFFSET, aggregation, GROUP BY, and DISTINCT are + // themselves unsupported on vectorSearch() directly, so the message must not claim a workaround + // that would only trip the user on a second validation error. + private static String rejectionMessage(String outerOp) { + switch (outerOp) { + case "WHERE": + return "Outer WHERE on a vectorSearch() subquery is not supported: the predicate does not" + + " participate in the vectorSearch pushdown contract and would be applied only" + + " after top-k results have been selected by vector distance, which can silently" + + " yield zero rows. Move the WHERE into the same SELECT block as vectorSearch() so" + + " it participates in the vectorSearch WHERE pushdown contract."; + case "ORDER BY": + return "Outer ORDER BY on a vectorSearch() subquery is not supported: sorting does not" + + " participate in the vectorSearch pushdown contract and would be applied only" + + " after top-k results have been selected by vector distance, which can yield" + + " mis-ordered results. Use ORDER BY ._score DESC in the same SELECT block" + + " as vectorSearch(), or omit ORDER BY."; + case "OFFSET": + return "Outer OFFSET on a vectorSearch() subquery is not supported. OFFSET is not" + + " supported on vectorSearch(); use LIMIT only."; + case "GROUP BY / aggregation / DISTINCT": + return "Outer GROUP BY / aggregation / DISTINCT on a vectorSearch() subquery is not" + + " supported. Aggregations and DISTINCT are not supported on vectorSearch()" + + " relations."; + default: + return "Outer " + outerOp + " on a vectorSearch() subquery is not supported."; + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java new file mode 100644 index 00000000000..33714a793ab --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java @@ -0,0 +1,285 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ConstantScoreQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.MatchBoolPrefixQueryBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.NestedQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.ScriptQueryBuilder; +import org.opensearch.index.query.SimpleQueryStringBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.WildcardQueryBuilder; +import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.ExpressionNodeVisitor; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.storage.FilterType; +import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder; +import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder.ScriptQueryUnSupportedException; +import org.opensearch.sql.opensearch.storage.serde.DefaultExpressionSerializer; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalSort; + +/** + * Query builder for vector search. The knn relevance score is preserved regardless of placement + * strategy — in {@code EFFICIENT} mode the knn query carries its own scores, and in {@code POST} + * mode the knn query sits in a scoring ({@code must}) context while the WHERE clause is applied as + * a non-scoring ({@code filter}) clause. + * + *

    Supports two filter placement strategies via {@link FilterType}: + * + *

      + *
    • {@code EFFICIENT} — WHERE inside {@code knn.filter} for pre-filtering during ANN search + * (default). + *
    • {@code POST} — WHERE in {@code bool.filter} outside knn (post-filtering fallback, used when + * the WHERE shape is not compatible with pre-filtering). + *
    + */ +public class VectorSearchQueryBuilder extends OpenSearchIndexScanQueryBuilder { + + private final QueryBuilder knnQuery; + private final Map options; + private final FilterType filterType; + private final boolean filterTypeExplicit; + private final Function rebuildKnnWithFilter; + private boolean filterPushed = false; + private boolean limitPushed = false; + + /** Full constructor with filter type support. */ + public VectorSearchQueryBuilder( + OpenSearchRequestBuilder requestBuilder, + QueryBuilder knnQuery, + Map options, + FilterType filterType, + boolean filterTypeExplicit, + Function rebuildKnnWithFilter) { + super(requestBuilder); + requestBuilder.getSourceBuilder().query(knnQuery); + this.knnQuery = knnQuery; + this.options = options; + this.filterType = filterType != null ? filterType : FilterType.EFFICIENT; + this.filterTypeExplicit = filterTypeExplicit; + if (this.filterType == FilterType.EFFICIENT && rebuildKnnWithFilter == null) { + throw new IllegalArgumentException( + "EFFICIENT filter mode requires a non-null rebuildKnnWithFilter callback"); + } + this.rebuildKnnWithFilter = rebuildKnnWithFilter; + } + + /** + * Test-only constructor — pins {@link FilterType#POST} so callers that do not wire a {@code + * rebuildKnnWithFilter} callback (unit tests) can still exercise the push-down contract. + * Production callers always go through the full constructor, which defaults to {@link + * FilterType#EFFICIENT}. + */ + public VectorSearchQueryBuilder( + OpenSearchRequestBuilder requestBuilder, QueryBuilder knnQuery, Map options) { + this(requestBuilder, knnQuery, options, FilterType.POST, false, null); + } + + @Override + public boolean pushDownFilter(LogicalFilter filter) { + FilterQueryBuilder queryBuilder = new FilterQueryBuilder(new DefaultExpressionSerializer()); + Expression queryCondition = filter.getCondition(); + + // _score is synthetic, not a stored field; a range query on it silently returns 0 rows. + // Users who want a score floor should use option='min_score=...'. + if (containsScoreReference(queryCondition)) { + throw new ExpressionEvaluationException( + "WHERE on _score is not supported on vectorSearch()." + + " Use option='min_score=...' for score-floor filtering."); + } + + QueryBuilder whereQuery; + try { + whereQuery = queryBuilder.build(queryCondition); + } catch (ScriptQueryUnSupportedException e) { + if (filterTypeExplicit) { + throw new ExpressionEvaluationException( + "filter_type only works when the WHERE clause can be translated to an" + + " OpenSearch filter. Rewrite the WHERE clause or omit filter_type."); + } + // Default mode: fall back to in-memory filtering (matches base class behavior) + return false; + } + filterPushed = true; + + if (filterType == FilterType.EFFICIENT) { + // Fail closed: knn.filter on AOSS rejects script queries and nested predicates expand the + // preview contract. Allow-list validator beats a blacklist walker. + validateEfficientFilterSafe(whereQuery); + QueryBuilder rebuiltKnn = rebuildKnnWithFilter.apply(whereQuery); + requestBuilder.getSourceBuilder().query(rebuiltKnn); + } else { + // POST mode: knn in must (scores), WHERE in filter (no scoring impact) + BoolQueryBuilder combined = QueryBuilders.boolQuery().must(knnQuery).filter(whereQuery); + requestBuilder.getSourceBuilder().query(combined); + } + return true; + } + + @Override + public boolean pushDownLimit(LogicalLimit limit) { + // OFFSET would shift the search window and silently drop top results; reject with a clear + // error rather than have the parent path push `from: ` into the request. + if (limit.getOffset() != null && limit.getOffset() != 0) { + throw new ExpressionEvaluationException( + "OFFSET is not supported on vectorSearch(). Remove OFFSET and use LIMIT only."); + } + validateLimitWithinK(limit.getLimit()); + limitPushed = true; + return super.pushDownLimit(limit); + } + + @Override + public boolean pushDownSort(LogicalSort sort) { + // Vector search returns results sorted by _score DESC by default. + // Only _score DESC is meaningful; reject all other sort expressions. + for (Pair sortItem : sort.getSortList()) { + Expression expr = sortItem.getRight(); + if (!(expr instanceof ReferenceExpression) + || !"_score".equals(((ReferenceExpression) expr).getAttr())) { + throw new ExpressionEvaluationException( + String.format( + "vectorSearch only supports ORDER BY _score DESC; " + + "unsupported sort expression: %s", + expr)); + } + if (sortItem.getLeft().getSortOrder() != Sort.SortOrder.DESC) { + throw new ExpressionEvaluationException( + "vectorSearch only supports ORDER BY _score DESC; _score ASC is not supported"); + } + } + // _score DESC is knn's natural order, so the sort itself is not pushed. Preserve the + // parent's sort.getCount() → limit contract; SQL sends 0, PPL may combine sort+limit. + if (sort.getCount() != 0) { + validateLimitWithinK(sort.getCount()); + limitPushed = true; + requestBuilder.pushDownLimit(sort.getCount(), 0); + } + return true; + } + + /** Validates that the requested limit does not exceed k in top-k mode. */ + private void validateLimitWithinK(int limit) { + if (options.containsKey("k")) { + int k = Integer.parseInt(options.get("k")); + if (limit > k) { + throw new ExpressionEvaluationException( + String.format("LIMIT %d exceeds k=%d in top-k vector search", limit, k)); + } + } + } + + // True if any ReferenceExpression in the tree names _score (case-insensitive, so quoted/ + // backticked variants cannot bypass the guard). + private static boolean containsScoreReference(Expression expr) { + AtomicBoolean found = new AtomicBoolean(false); + expr.accept( + new ExpressionNodeVisitor() { + @Override + public Void visitReference(ReferenceExpression node, Void context) { + if (node.getAttr() != null && "_score".equalsIgnoreCase(node.getAttr())) { + found.set(true); + } + return null; + } + }, + null); + return found.get(); + } + + // Allow-list of leaf query types FilterQueryBuilder emits today. Any new wrapper or container + // appearing here must fail closed rather than silently embed under knn.filter. + private static final Set> SAFE_EFFICIENT_FILTER_LEAVES = + Set.of( + TermQueryBuilder.class, + RangeQueryBuilder.class, + WildcardQueryBuilder.class, + MatchQueryBuilder.class, + MatchPhraseQueryBuilder.class, + MatchPhrasePrefixQueryBuilder.class, + MultiMatchQueryBuilder.class, + QueryStringQueryBuilder.class, + SimpleQueryStringBuilder.class, + MatchBoolPrefixQueryBuilder.class, + ExistsQueryBuilder.class); + + // Package-private for direct branch coverage in unit tests. Fail-closed: recurse known + // containers, reject ScriptQueryBuilder/NestedQueryBuilder with targeted messages, allow + // listed leaves, reject everything else as unsupported shape. + static void validateEfficientFilterSafe(QueryBuilder qb) { + if (qb == null) { + return; + } + if (qb instanceof ScriptQueryBuilder) { + throw new ExpressionEvaluationException( + "vectorSearch WHERE pre-filtering does not support predicates that compile to" + + " script queries (arithmetic, function calls, CASE, date math). Rewrite the" + + " WHERE clause to use term/range/bool predicates, or set filter_type=post to" + + " apply the predicate after the k-NN search."); + } + if (qb instanceof BoolQueryBuilder) { + BoolQueryBuilder bool = (BoolQueryBuilder) qb; + bool.must().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe); + bool.filter().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe); + bool.should().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe); + bool.mustNot().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe); + return; + } + if (qb instanceof ConstantScoreQueryBuilder) { + validateEfficientFilterSafe(((ConstantScoreQueryBuilder) qb).innerQuery()); + return; + } + if (qb instanceof NestedQueryBuilder) { + throw new ExpressionEvaluationException( + "vectorSearch WHERE pre-filtering does not support nested predicates in this" + + " preview. Rewrite the WHERE clause using non-nested fields, or set" + + " filter_type=post to apply the predicate after the k-NN search."); + } + if (SAFE_EFFICIENT_FILTER_LEAVES.contains(qb.getClass())) { + return; + } + throw new ExpressionEvaluationException( + "vectorSearch WHERE pre-filtering encountered an unsupported filter query shape: " + + qb.getClass().getSimpleName() + + ". Rewrite the WHERE clause using simple term/range/bool predicates, or set" + + " filter_type=post to apply the predicate after the k-NN search."); + } + + @Override + public OpenSearchRequestBuilder build() { + if (filterTypeExplicit && !filterPushed) { + throw new ExpressionEvaluationException("filter_type requires a pushdownable WHERE clause"); + } + boolean isRadial = !options.containsKey("k"); + if (isRadial && !limitPushed) { + throw new ExpressionEvaluationException( + "LIMIT is required for radial vector search (max_distance or min_score)." + + " Without LIMIT, the result set size is unbounded."); + } + return super.build(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 7c15586d143..35a6c1f26cf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -12,6 +12,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -27,6 +28,7 @@ import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; @@ -49,7 +51,6 @@ public class AggPushDownAction implements OSRequestBuilderAction { private Pair, OpenSearchAggregationResponseParser> builderAndParser; private final Map extendedTypeMapping; - private final long scriptCount; // Record the output field names of all buckets as the sequence of buckets private List bucketNames; @@ -59,12 +60,10 @@ public AggPushDownAction( List bucketNames) { this.builderAndParser = builderAndParser; this.extendedTypeMapping = extendedTypeMapping; - this.scriptCount = - builderAndParser.getLeft().stream().mapToInt(AggPushDownAction::getScriptCount).sum(); this.bucketNames = bucketNames; } - private static int getScriptCount(AggregationBuilder aggBuilder) { + static int getScriptCount(AggregationBuilder aggBuilder) { if (aggBuilder instanceof NestedAggregationBuilder) { aggBuilder = aggBuilder.getSubAggregations().iterator().next(); } @@ -85,6 +84,25 @@ private static int getScriptCount(AggregationBuilder aggBuilder) { return 0; } + private static AggregatorFactories.Builder copySubAggregations(AggregationBuilder source) { + AggregatorFactories.Builder copiedFactories = new AggregatorFactories.Builder(); + source.getSubAggregations().forEach(copiedFactories::addAggregator); + source.getPipelineAggregations().forEach(copiedFactories::addPipelineAggregator); + return copiedFactories; + } + + private void replaceRootBuilder( + AggregationBuilder originalRoot, AggregationBuilder newInnerBuilder) { + AggregationBuilder finalBuilder = newInnerBuilder; + if (originalRoot instanceof NestedAggregationBuilder nested) { + finalBuilder = + AggregationBuilders.nested(nested.getName(), nested.path()) + .subAggregation(newInnerBuilder); + } + builderAndParser = + Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); + } + @Override public void apply(OpenSearchRequestBuilder requestBuilder) { requestBuilder.pushDownAggregation(builderAndParser); @@ -257,24 +275,74 @@ private TermsAggregationBuilder buildTermsAggregationBuilder( } /** Build a {@link DateHistogramAggregationBuilder} by {@link DateHistogramValuesSourceBuilder} */ - private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( - DateHistogramValuesSourceBuilder dateHisto, BucketOrder bucketOrder) { - DateHistogramAggregationBuilder dateHistoBuilder = - new DateHistogramAggregationBuilder(dateHisto.name()); - if (dateHisto.field() != null) { - dateHistoBuilder.field(dateHisto.field()); - } - if (dateHisto.script() != null) { - dateHistoBuilder.script(dateHisto.script()); + private static void copyDateHistogramInterval( + DateHistogramValuesSourceBuilder source, + Consumer fixedIntervalSetter, + Consumer calendarIntervalSetter) { + try { + fixedIntervalSetter.accept(source.getIntervalAsFixed()); + return; + } catch (IllegalArgumentException | IllegalStateException ignored) { + // Fallback to calendar interval. } try { - dateHistoBuilder.fixedInterval(dateHisto.getIntervalAsFixed()); - } catch (IllegalArgumentException e) { - dateHistoBuilder.calendarInterval(dateHisto.getIntervalAsCalendar()); + calendarIntervalSetter.accept(source.getIntervalAsCalendar()); + return; + } catch (IllegalArgumentException | IllegalStateException ignored) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot copy interval for date histogram bucket " + source.name()); + } + } + + private static void copyDateHistogramBucketOptions( + DateHistogramValuesSourceBuilder source, DateHistogramAggregationBuilder target) { + if (source.field() != null) { + target.field(source.field()); + } + if (source.script() != null) { + target.script(source.script()); + } + copyDateHistogramInterval(source, target::fixedInterval, target::calendarInterval); + if (source.userValuetypeHint() != null) { + target.userValueTypeHint(source.userValuetypeHint()); + } + if (source.timeZone() != null) { + target.timeZone(source.timeZone()); + } + if (source.offset() != 0) { + target.offset(source.offset()); + } + if (source.format() != null) { + target.format(source.format()); } - if (dateHisto.userValuetypeHint() != null) { - dateHistoBuilder.userValueTypeHint(dateHisto.userValuetypeHint()); + // Composite group-by only returns buckets with documents. Preserve that when rewriting. + target.minDocCount(1); + } + + private static void copyHistogramBucketOptions( + HistogramValuesSourceBuilder source, HistogramAggregationBuilder target) { + if (source.field() != null) { + target.field(source.field()); + } + if (source.script() != null) { + target.script(source.script()); + } + target.interval(source.interval()); + if (source.userValuetypeHint() != null) { + target.userValueTypeHint(source.userValuetypeHint()); + } + if (source.format() != null) { + target.format(source.format()); } + // Composite group-by only returns buckets with documents. Preserve that when rewriting. + target.minDocCount(1); + } + + private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( + DateHistogramValuesSourceBuilder dateHisto, BucketOrder bucketOrder) { + DateHistogramAggregationBuilder dateHistoBuilder = + new DateHistogramAggregationBuilder(dateHisto.name()); + copyDateHistogramBucketOptions(dateHisto, dateHistoBuilder); dateHistoBuilder.order(bucketOrder); return dateHistoBuilder; } @@ -283,16 +351,7 @@ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( private HistogramAggregationBuilder buildHistogramAggregationBuilder( HistogramValuesSourceBuilder histo, BucketOrder bucketOrder) { HistogramAggregationBuilder histoBuilder = new HistogramAggregationBuilder(histo.name()); - if (histo.field() != null) { - histoBuilder.field(histo.field()); - } - if (histo.script() != null) { - histoBuilder.script(histo.script()); - } - histoBuilder.interval(histo.interval()); - if (histo.userValuetypeHint() != null) { - histoBuilder.userValueTypeHint(histo.userValuetypeHint()); - } + copyHistogramBucketOptions(histo, histoBuilder); histoBuilder.order(bucketOrder); return histoBuilder; } @@ -408,19 +467,11 @@ public void pushDownSortIntoAggBucket( newBuckets.add(buckets.get(bucketNames.indexOf(name))); newBucketNames.add(name); }); - AggregatorFactories.Builder newAggBuilder = new AggregatorFactories.Builder(); - compositeAggBuilder.getSubAggregations().forEach(newAggBuilder::addAggregator); AggregationBuilder finalBuilder = - AggregationBuilders.composite("composite_buckets", newBuckets) - .subAggregations(newAggBuilder) + AggregationBuilders.composite(compositeAggBuilder.getName(), newBuckets) + .subAggregations(copySubAggregations(compositeAggBuilder)) .size(compositeAggBuilder.size()); - if (original instanceof NestedAggregationBuilder nested) { - finalBuilder = - AggregationBuilders.nested(nested.getName(), nested.path()) - .subAggregation(finalBuilder); - } - builderAndParser = - Pair.of(Collections.singletonList(finalBuilder), builderAndParser.getRight()); + replaceRootBuilder(original, finalBuilder); bucketNames = newBucketNames; } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { @@ -429,16 +480,6 @@ public void pushDownSortIntoAggBucket( // TODO for MultiTermsAggregationBuilder } - public boolean isCompositeAggregation() { - return builderAndParser.getLeft().stream() - .anyMatch( - builder -> - builder instanceof CompositeAggregationBuilder - || (builder instanceof NestedAggregationBuilder - && builder.getSubAggregations().iterator().next() - instanceof CompositeAggregationBuilder)); - } - /** * Check if the limit can be pushed down into aggregation bucket when the limit size is less than * bucket number. diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java new file mode 100644 index 00000000000..7c0b0f15f8b --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggSpec.java @@ -0,0 +1,386 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan.context; + +import static org.opensearch.search.aggregations.MultiBucketConsumerService.DEFAULT_MAX_BUCKETS; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; +import lombok.Getter; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.HistogramValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.metrics.TopHitsAggregationBuilder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; + +/** Immutable aggregation pushdown state and ordered replay plan. */ +@Getter +public final class AggSpec { + private enum AggKind { + OTHER, + COMPOSITE, + TERMS, + MULTI_TERMS, + DATE_HISTOGRAM, + HISTOGRAM, + TOP_HITS, + RARE_TOP + } + + private enum LimitPushdownMode { + UNSUPPORTED, + ESTIMATE_ONLY, + LEAF_METRIC, + BUCKET_SIZE + } + + private interface BuildAction extends AbstractAction { + @Override + default void pushOperation(PushDownContext context, PushDownOperation operation) { + throw new UnsupportedOperationException("Internal aggregation build action cannot be queued"); + } + } + + private final Pair, OpenSearchAggregationResponseParser> + baseBuilderAndParser; + private final Map extendedTypeMapping; + private final List initialBucketNames; + // Cost model uses the script count of the base logical aggregation. Supported rewrites keep the + // same scripted sources/metrics semantically, while replay-time builders are request-scoped and + // may not preserve a structure that can be re-counted accurately after rewrite. + private final long scriptCount; + private final AggKind kind; + private final LimitPushdownMode limitPushdownMode; + // The pushdown operation queue to rewrite base agg + private final List operationsForAgg; + @Nullable private final Integer bucketSize; + + private AggSpec( + Pair, OpenSearchAggregationResponseParser> baseBuilderAndParser, + Map extendedTypeMapping, + List initialBucketNames, + long scriptCount, + AggKind kind, + LimitPushdownMode limitPushdownMode, + List operationsForAgg, + @Nullable Integer bucketSize) { + this.baseBuilderAndParser = baseBuilderAndParser; + this.extendedTypeMapping = Map.copyOf(extendedTypeMapping); + this.initialBucketNames = List.copyOf(initialBucketNames); + this.scriptCount = scriptCount; + this.kind = kind; + this.limitPushdownMode = limitPushdownMode; + this.operationsForAgg = List.copyOf(operationsForAgg); + this.bucketSize = bucketSize; + } + + public static AggSpec create( + Map extendedTypeMapping, + List bucketNames, + Pair, OpenSearchAggregationResponseParser> builderAndParser) { + AggregationBuilder rootBuilder = + builderAndParser.getLeft().isEmpty() ? null : builderAndParser.getLeft().getFirst(); + AggKind kind = inferKind(rootBuilder); + return new AggSpec( + builderAndParser, + extendedTypeMapping, + bucketNames, + builderAndParser.getLeft().stream().mapToInt(AggPushDownAction::getScriptCount).sum(), + kind, + inferBaseLimitPushdownMode(rootBuilder, kind), + List.of(), + inferBucketSize(rootBuilder)); + } + + public boolean isCompositeAggregation() { + return kind == AggKind.COMPOSITE; + } + + public boolean canPushDownLimitIntoBucketSize(int size) { + return switch (limitPushdownMode) { + case BUCKET_SIZE -> bucketSize != null && size < bucketSize; + case LEAF_METRIC -> true; + case ESTIMATE_ONLY -> false; + case UNSUPPORTED -> + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown limit into aggregation bucket"); + }; + } + + public AggSpec withBucketSort(List collations, List fieldNames) { + if (kind != AggKind.COMPOSITE && kind != AggKind.TERMS) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort into aggregation bucket"); + } + if (kind == AggKind.COMPOSITE) { + for (RelFieldCollation collation : collations) { + String bucketName = fieldNames.get(collation.getFieldIndex()); + if (!initialBucketNames.contains(bucketName)) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort into aggregation bucket"); + } + } + } + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + kind, + limitPushdownMode, + replaceOperations( + PushDownType.SORT, + collations, + action -> action.pushDownSortIntoAggBucket(collations, fieldNames)), + bucketSize); + } + + public AggSpec withoutBucketSort() { + if (operationsForAgg.stream().noneMatch(operation -> operation.type() == PushDownType.SORT)) { + return this; + } + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + kind, + limitPushdownMode, + removeOperations(PushDownType.SORT), + bucketSize); + } + + public AggSpec withSortMeasure(List collations, List fieldNames) { + AggKind rewriteTarget = inferMeasureSortTarget(); + if (rewriteTarget == null) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort aggregate measure"); + } + Integer resizedBucketSize = + switch (rewriteTarget) { + case TERMS, MULTI_TERMS -> bucketSize; + default -> null; + }; + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + rewriteTarget, + inferLimitPushdownMode(rewriteTarget), + replaceOperations( + PushDownType.SORT_AGG_METRICS, + collations, + action -> action.rePushDownSortAggMeasure(collations, fieldNames)), + resizedBucketSize); + } + + public AggSpec withRareTop(RareTopDigest digest) { + if (!supportsCurrentRareTop()) { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException("Cannot pushdown " + digest); + } + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + AggKind.RARE_TOP, + inferLimitPushdownMode(AggKind.RARE_TOP), + replaceOperations( + PushDownType.RARE_TOP, digest, action -> action.rePushDownRareTop(digest)), + digest.byList().isEmpty() ? digest.number() : DEFAULT_MAX_BUCKETS); + } + + public AggSpec withLimit(int size) { + switch (limitPushdownMode) { + case ESTIMATE_ONLY, LEAF_METRIC: + return this; + case UNSUPPORTED: + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown limit into aggregation bucket"); + case BUCKET_SIZE: + if (!canPushDownLimitIntoBucketSize(size)) { + return this; + } + break; + } + return new AggSpec( + baseBuilderAndParser, + extendedTypeMapping, + initialBucketNames, + scriptCount, + kind, + limitPushdownMode, + replaceOperations( + PushDownType.LIMIT, + new LimitDigest(size, 0), + action -> action.pushDownLimitIntoBucketSize(size)), + size); + } + + public AggPushDownAction buildAction() { + AggPushDownAction action = + new AggPushDownAction( + baseBuilderAndParser, extendedTypeMapping, new ArrayList<>(initialBucketNames)); + operationsForAgg.forEach(operation -> ((BuildAction) operation.action()).apply(action)); + return action; + } + + private List replaceOperations( + PushDownType type, Object digest, BuildAction action) { + List newOperations = removeOperations(type); + newOperations.add(new PushDownOperation(type, digest, action)); + return newOperations; + } + + private List removeOperations(PushDownType type) { + return new ArrayList<>( + operationsForAgg.stream().filter(operation -> operation.type() != type).toList()); + } + + private static AggKind inferKind(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof CompositeAggregationBuilder) { + return AggKind.COMPOSITE; + } + if (builder instanceof TermsAggregationBuilder) { + return AggKind.TERMS; + } + if (builder instanceof MultiTermsAggregationBuilder) { + return AggKind.MULTI_TERMS; + } + if (builder instanceof DateHistogramAggregationBuilder) { + return AggKind.DATE_HISTOGRAM; + } + if (builder instanceof HistogramAggregationBuilder) { + return AggKind.HISTOGRAM; + } + if (builder instanceof TopHitsAggregationBuilder) { + return AggKind.TOP_HITS; + } + return AggKind.OTHER; + } + + private static LimitPushdownMode inferLimitPushdownMode(AggKind kind) { + return switch (kind) { + case COMPOSITE, TERMS, MULTI_TERMS, TOP_HITS, RARE_TOP -> LimitPushdownMode.BUCKET_SIZE; + case OTHER, DATE_HISTOGRAM, HISTOGRAM -> LimitPushdownMode.UNSUPPORTED; + }; + } + + private static LimitPushdownMode inferBaseLimitPushdownMode( + @Nullable AggregationBuilder rootBuilder, AggKind kind) { + if (rootBuilder == null) { + // count() optimization uses hits.total and leaves the builder list empty. Keeps + // LIMIT in PushDownContext for these cases even though no request-side limit is applied. + return LimitPushdownMode.ESTIMATE_ONLY; + } + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof ValuesSourceAggregationBuilder.LeafOnly) { + // Treats leaf metric aggregations as limit-pushable because they produce a single row. + return LimitPushdownMode.LEAF_METRIC; + } + return inferLimitPushdownMode(kind); + } + + private static boolean supportsBaseRareTop(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (!(builder instanceof CompositeAggregationBuilder composite)) { + return false; + } + if (composite.sources().size() == 1) { + return composite.sources().getFirst() instanceof TermsValuesSourceBuilder terms + && !terms.missingBucket(); + } + return composite.sources().stream() + .allMatch(src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()); + } + + @Nullable + private AggKind inferMeasureSortTarget() { + if (kind != AggKind.COMPOSITE) { + return null; + } + AggregationBuilder rootBuilder = + baseBuilderAndParser.getLeft().isEmpty() ? null : baseBuilderAndParser.getLeft().getFirst(); + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (!(builder instanceof CompositeAggregationBuilder composite)) { + return null; + } + if (composite.getSubAggregations().stream() + .anyMatch(metric -> !(metric instanceof ValuesSourceAggregationBuilder.LeafOnly))) { + return null; + } + if (composite.sources().size() == 1) { + CompositeValuesSourceBuilder source = composite.sources().getFirst(); + if (source instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) { + return AggKind.TERMS; + } + if (source instanceof DateHistogramValuesSourceBuilder) { + return AggKind.DATE_HISTOGRAM; + } + if (source instanceof HistogramValuesSourceBuilder histo && !histo.missingBucket()) { + return AggKind.HISTOGRAM; + } + return null; + } + return composite.sources().stream() + .allMatch( + src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) + ? AggKind.MULTI_TERMS + : null; + } + + private boolean supportsCurrentRareTop() { + return kind == AggKind.COMPOSITE + && supportsBaseRareTop( + baseBuilderAndParser.getLeft().isEmpty() + ? null + : baseBuilderAndParser.getLeft().getFirst()); + } + + @Nullable + private static Integer inferBucketSize(@Nullable AggregationBuilder rootBuilder) { + AggregationBuilder builder = unwrapNestedBuilder(rootBuilder); + if (builder instanceof CompositeAggregationBuilder composite) { + return composite.size(); + } + if (builder instanceof TermsAggregationBuilder terms) { + return terms.size(); + } + if (builder instanceof MultiTermsAggregationBuilder multiTerms) { + return multiTerms.size(); + } + if (builder instanceof TopHitsAggregationBuilder topHits) { + return topHits.size(); + } + return null; + } + + @Nullable + private static AggregationBuilder unwrapNestedBuilder(@Nullable AggregationBuilder rootBuilder) { + if (rootBuilder instanceof NestedAggregationBuilder nested + && !nested.getSubAggregations().isEmpty()) { + return nested.getSubAggregations().iterator().next(); + } + return rootBuilder; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java deleted file mode 100644 index f9f43c89a7b..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.storage.scan.context; - -/** A lambda action to apply on the {@link AggPushDownAction} */ -public interface AggregationBuilderAction extends AbstractAction { - default void pushOperation(PushDownContext context, PushDownOperation operation) { - // Apply transformation to aggregation builder in the optimization phase as some transformation - // may cause exception. We need to detect that exception in advance. - apply(context.getAggPushDownAction()); - context.addOperationForAgg(operation); - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 2d236207c10..a622f948efb 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -11,6 +11,7 @@ import java.util.List; import javax.annotation.Nullable; import lombok.Getter; +import lombok.Setter; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Project; import org.jetbrains.annotations.NotNull; @@ -27,8 +28,7 @@ public class PushDownContext extends AbstractCollection { private ArrayDeque operationsForRequestBuilder; private boolean isAggregatePushed = false; - private AggPushDownAction aggPushDownAction; - private ArrayDeque operationsForAgg; + @Setter private AggSpec aggSpec; // Records the start pos of the query, which is updated by new added limit operations. private int startFrom = 0; @@ -49,7 +49,10 @@ public PushDownContext(OpenSearchIndex osIndex) { @Override public PushDownContext clone() { PushDownContext newContext = new PushDownContext(osIndex); - newContext.addAll(this); + for (PushDownOperation operation : this) { + newContext.add(operation); + } + newContext.aggSpec = aggSpec; return newContext; } @@ -65,6 +68,7 @@ public PushDownContext cloneWithoutSort() { newContext.add(action); } } + newContext.aggSpec = aggSpec == null ? null : aggSpec.withoutBucketSort(); return newContext; } @@ -132,20 +136,11 @@ void addOperationForRequestBuilder(PushDownOperation operation) { queue.add(operation); } - void addOperationForAgg(PushDownOperation operation) { - if (operationsForAgg == null) { - this.operationsForAgg = new ArrayDeque<>(); - } - operationsForAgg.add(operation); - queue.add(operation); - } - @Override public boolean add(PushDownOperation operation) { operation.action().pushOperation(this, operation); if (operation.type() == PushDownType.AGGREGATION) { isAggregatePushed = true; - this.aggPushDownAction = (AggPushDownAction) operation.action(); } if (operation.type() == PushDownType.LIMIT) { startFrom += ((LimitDigest) operation.digest()).offset(); @@ -214,6 +209,9 @@ public OpenSearchRequestBuilder createRequestBuilder() { operationsForRequestBuilder.forEach( operation -> ((OSRequestBuilderAction) operation.action()).apply(newRequestBuilder)); } + if (aggSpec != null) { + aggSpec.buildAction().apply(newRequestBuilder); + } return newRequestBuilder; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java index 224d7019ec2..6e2240909b0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java @@ -77,6 +77,7 @@ import org.opensearch.script.ScriptEngine; import org.opensearch.script.StringSortScript; import org.opensearch.search.lookup.SourceLookup; +import org.opensearch.sql.calcite.utils.CalciteClassLoaderHelper; import org.opensearch.sql.data.model.ExprTimestampValue; import org.opensearch.sql.opensearch.storage.script.aggregation.CalciteAggregationScriptFactory; import org.opensearch.sql.opensearch.storage.script.field.CalciteFieldScriptFactory; @@ -138,7 +139,9 @@ public T compile( new RelRecordType(List.of())); Function1 function = - new RexExecutable(code, "generated Rex code").getFunction(); + CalciteClassLoaderHelper.withCalciteClassLoader( + () -> new RexExecutable(code, "generated Rex code").getFunction(), + CalciteScriptEngine.class); if (CONTEXTS.containsKey(context)) { return context.factoryClazz.cast(CONTEXTS.get(context).apply(function, rexNode.getType())); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index 6ca25b7e9b7..2ff0dfa4a50 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -29,6 +29,7 @@ import org.opensearch.sql.expression.function.FunctionName; import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.ScriptEngineType; import org.opensearch.sql.opensearch.storage.script.core.ExpressionScript; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.ExistsQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.LikeQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.LuceneQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.NestedQuery; @@ -86,6 +87,8 @@ public ScriptQueryUnSupportedException(String message) { .put(BuiltinFunctionName.WILDCARD_QUERY.getName(), new WildcardQuery()) .put(BuiltinFunctionName.WILDCARDQUERY.getName(), new WildcardQuery()) .put(BuiltinFunctionName.NESTED.getName(), new NestedQuery()) + .put(BuiltinFunctionName.IS_NULL.getName(), new ExistsQuery(true /* negated */)) + .put(BuiltinFunctionName.IS_NOT_NULL.getName(), new ExistsQuery(false)) .build(); /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/ExistsQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/ExistsQuery.java new file mode 100644 index 00000000000..5822f2f416a --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/ExistsQuery.java @@ -0,0 +1,69 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.filter.lucene; + +import static org.opensearch.sql.analysis.NestedAnalyzer.isNestedFunction; + +import lombok.RequiredArgsConstructor; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.ReferenceExpression; + +/** + * Lucene query that builds a native {@code exists} DSL fragment for {@code IS NULL} / {@code IS NOT + * NULL} predicates. + * + *

    This replaces the previous behavior of serializing these unary predicates as compounded script + * queries. The native {@code exists} query is cheaper, AOSS / serverless compatible, and the + * expected DSL shape downstream consumers look for. + * + *

    Unlike most {@link LuceneQuery} subclasses this predicate family is unary (a single reference + * argument) rather than the standard {ref, literal} pair, so this class overrides both {@link + * #canSupport(FunctionExpression)} and {@link #build(FunctionExpression)}. + * + *

    Nested-field predicates are intentionally NOT supported here: OpenSearch DSL does not handle + * {@code IS_NULL} / {@code IS_NOT_NULL} on nested fields correctly (see the equivalent guard in + * {@code PredicateAnalyzer} for the Calcite path). When the reference is a nested function, {@link + * #canSupport} returns {@code false} and {@link + * org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder} falls back to the script + * query path, preserving correctness. + */ +@RequiredArgsConstructor +public class ExistsQuery extends LuceneQuery { + + /** When true, the predicate is {@code IS NULL} and the exists query is wrapped in must_not. */ + private final boolean negated; + + @Override + public boolean canSupport(FunctionExpression func) { + return func.getArguments().size() == 1 + && func.getArguments().get(0) instanceof ReferenceExpression + && !isNestedFunction(func.getArguments().get(0)); + } + + /** + * Unary IS NULL / IS NOT NULL has no {@code arg[1]}, so we must never route through {@link + * org.opensearch.sql.opensearch.storage.script.filter.lucene.NestedQuery#buildNested} — that path + * reads {@code func.getArguments().get(1)} and would throw. Returning {@code false} here forces + * {@code FilterQueryBuilder} to fall back to the script-query path for nested-field predicates. + */ + @Override + public boolean isNestedPredicate(FunctionExpression func) { + return false; + } + + @Override + public QueryBuilder build(FunctionExpression func) { + ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0); + String fieldName = ref.getRawPath(); + QueryBuilder existsQuery = QueryBuilders.existsQuery(fieldName); + if (negated) { + return QueryBuilders.boolQuery().mustNot(existsQuery); + } + return existsQuery; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/MergeRuleHelper.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/MergeRuleHelper.java index b2b851adec7..6cc6f1803a7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/MergeRuleHelper.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/MergeRuleHelper.java @@ -12,7 +12,7 @@ public class MergeRuleHelper { private static final List RULES = List.of( - new DeepMergeRule(), new LatestRule() // must come last + new DeepMergeRule(), new TextKeywordConflictRule(), new LatestRule() // must come last ); public static MergeRule selectRule(OpenSearchDataType source, OpenSearchDataType target) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRule.java new file mode 100644 index 00000000000..04d98e1a60f --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRule.java @@ -0,0 +1,72 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.util.MergeRules; + +import java.util.Map; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; +import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; + +/** + * Merge rule for text/keyword type conflicts across indices. When a field is text in one index and + * keyword in another, or text-with-keyword-subfield in one and text-without in another, we merge to + * text WITHOUT keyword subfields. This forces _source retrieval instead of doc_values, which works + * universally across all shards regardless of the actual field type. + * + *

    See GitHub issue #4659. + */ +public class TextKeywordConflictRule implements MergeRule { + + @Override + public boolean isMatch(OpenSearchDataType source, OpenSearchDataType target) { + if (source == null || target == null) { + return false; + } + MappingType sourceMapping = source.getMappingType(); + MappingType targetMapping = target.getMappingType(); + if (sourceMapping == null || targetMapping == null) { + return false; + } + // Match when one is text and the other is keyword + if (isTextLike(sourceMapping) && isKeyword(targetMapping)) { + return true; + } + if (isKeyword(sourceMapping) && isTextLike(targetMapping)) { + return true; + } + // Match when both are text but one has keyword subfields and the other does not + if (isTextLike(sourceMapping) && isTextLike(targetMapping)) { + boolean sourceHasKeywordSub = hasKeywordSubField(source); + boolean targetHasKeywordSub = hasKeywordSubField(target); + return sourceHasKeywordSub != targetHasKeywordSub; + } + return false; + } + + @Override + public void mergeInto( + String key, OpenSearchDataType source, Map target) { + // Always merge to text WITHOUT keyword subfields. + // This forces _source retrieval, which works for both text and keyword fields. + target.put(key, OpenSearchTextType.of()); + } + + private static boolean isTextLike(MappingType mappingType) { + return mappingType == MappingType.Text || mappingType == MappingType.MatchOnlyText; + } + + private static boolean isKeyword(MappingType mappingType) { + return mappingType == MappingType.Keyword; + } + + private static boolean hasKeywordSubField(OpenSearchDataType type) { + if (type instanceof OpenSearchTextType textType) { + return textType.getFields().values().stream() + .anyMatch(f -> f.getMappingType() == MappingType.Keyword); + } + return false; + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java index 81261aa7a70..5885db1427b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java @@ -66,6 +66,7 @@ import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.data.model.ExprIntegerValue; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; @@ -244,7 +245,12 @@ void get_index_mappings_with_IOException() { @Test void get_index_mappings_with_index_patterns() { mockNodeClientIndicesMappings("", null); - assertThrows(IndexNotFoundException.class, () -> client.getIndexMappings("test*")); + ErrorReport report = assertThrows(ErrorReport.class, () -> client.getIndexMappings("test*")); + assertTrue( + report.getMessage().contains("test*") && report.getMessage().contains("no such index"), + "expected index-not-found error message \"" + + report.getMessage() + + "\" to resemble \"no such index [index]\""); } @Test @@ -252,7 +258,7 @@ void get_index_mappings_with_non_exist_index() { when(nodeClient.admin().indices().prepareGetMappings(any()).setLocal(anyBoolean()).get()) .thenThrow(IndexNotFoundException.class); - assertThrows(IndexNotFoundException.class, () -> client.getIndexMappings("non_exist_index")); + assertThrows(ErrorReport.class, () -> client.getIndexMappings("non_exist_index")); } @Test @@ -493,6 +499,66 @@ void ml() { assertNotNull(client.getNodeClient()); } + @Test + void get_index_mappings_error_message_includes_single_index() { + String underlyingError = "Connection timeout"; + when(nodeClient.admin().indices()).thenThrow(new RuntimeException(underlyingError)); + + IllegalStateException exception = + assertThrows(IllegalStateException.class, () -> client.getIndexMappings("test_index")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("test_index")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_mappings_error_message_includes_multiple_indices() { + String underlyingError = "Access denied"; + when(nodeClient.admin().indices()).thenThrow(new RuntimeException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> client.getIndexMappings("index1", "index2", "index3")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("index1")), + () -> assertTrue(exception.getMessage().contains("index2")), + () -> assertTrue(exception.getMessage().contains("index3")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_max_result_windows_error_message_includes_single_index() { + String underlyingError = "Network error"; + when(nodeClient.admin().indices()).thenThrow(new RuntimeException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, () -> client.getIndexMaxResultWindows("test_index")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("test_index")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_max_result_windows_error_message_includes_multiple_indices() { + String underlyingError = "Permission denied"; + when(nodeClient.admin().indices()).thenThrow(new RuntimeException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> client.getIndexMaxResultWindows("logs-2024", "metrics-2024")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("logs-2024")), + () -> assertTrue(exception.getMessage().contains("metrics-2024")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + public void mockNodeClientIndicesMappings(String indexName, String mappings) { GetMappingsResponse mockResponse = mock(GetMappingsResponse.class); MappingMetadata emptyMapping = mock(MappingMetadata.class); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java index afd210da1ff..6b101a0107a 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java @@ -531,6 +531,70 @@ void ml_with_exception() { assertEquals(Optional.empty(), client.getNodeClient()); } + @Test + void get_index_mappings_error_message_includes_single_index() throws IOException { + String underlyingError = "Network timeout"; + when(restClient.indices().getMapping(any(GetMappingsRequest.class), any())) + .thenThrow(new IOException(underlyingError)); + + IllegalStateException exception = + assertThrows(IllegalStateException.class, () -> client.getIndexMappings("test_index")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("test_index")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_mappings_error_message_includes_multiple_indices() throws IOException { + String underlyingError = "Connection refused"; + when(restClient.indices().getMapping(any(GetMappingsRequest.class), any())) + .thenThrow(new IOException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> client.getIndexMappings("index1", "index2", "index3")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("index1")), + () -> assertTrue(exception.getMessage().contains("index2")), + () -> assertTrue(exception.getMessage().contains("index3")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_max_result_windows_error_message_includes_single_index() throws IOException { + String underlyingError = "Authentication failed"; + when(restClient.indices().getSettings(any(GetSettingsRequest.class), any())) + .thenThrow(new IOException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, () -> client.getIndexMaxResultWindows("test_index")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("test_index")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + + @Test + void get_index_max_result_windows_error_message_includes_multiple_indices() throws IOException { + String underlyingError = "Timeout"; + when(restClient.indices().getSettings(any(GetSettingsRequest.class), any())) + .thenThrow(new IOException(underlyingError)); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> client.getIndexMaxResultWindows("logs-2024", "metrics-2024")); + + assertAll( + () -> assertTrue(exception.getMessage().contains("logs-2024")), + () -> assertTrue(exception.getMessage().contains("metrics-2024")), + () -> assertTrue(exception.getMessage().contains(underlyingError))); + } + private Map mockFieldMappings(String indexName, String mappings) throws IOException { return ImmutableMap.of(indexName, IndexMetadata.fromXContent(createParser(mappings)).mapping()); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 0734613e522..031b9243f38 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -234,6 +234,9 @@ public void constructIp() { public void constructBoolean() { assertAll( () -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":true}").get("boolV")), + () -> assertEquals(booleanValue(false), tupleValue("{\"boolV\":false}").get("boolV")), + () -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":1}").get("boolV")), + () -> assertEquals(booleanValue(false), tupleValue("{\"boolV\":0}").get("boolV")), () -> assertEquals(booleanValue(true), constructFromObject("boolV", true)), () -> assertEquals(booleanValue(true), constructFromObject("boolV", "true")), () -> assertEquals(booleanValue(true), constructFromObject("boolV", 1)), diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 572f748fd03..733c2de5213 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -1245,4 +1245,233 @@ void search_complementedPointsWithNullAsFalse_generatesExistsAndNotInQuery() """, result.toString()); } + + @Test + void search_complementedPointsWithNullAsUnknown_generatesExistsAndNotInQuery() + throws ExpressionNotAnalyzableException { + // Simulates: a NOT IN (12, 13) + // Calcite represents this as SEARCH($0, Sarg[...; NULL AS UNKNOWN]) with complemented points + // SQL three-valued logic: NULL NOT IN (...) evaluates to UNKNOWN (not TRUE), + // so null rows must be excluded. + Sarg sarg = + Sarg.of( + RexUnknownAs.UNKNOWN, + ImmutableRangeSet.builder() + .add(Range.lessThan(BigDecimal.valueOf(12))) + .add(Range.open(BigDecimal.valueOf(12), BigDecimal.valueOf(13))) + .add(Range.greaterThan(BigDecimal.valueOf(13))) + .build()); + RexNode sargLiteral = + builder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + RexNode call = builder.makeCall(SqlStdOperatorTable.SEARCH, field1, sargLiteral); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "bool" : { + "must_not" : [ + { + "terms" : { + "a" : [ + 12.0, + 13.0 + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notLike_keywordField_generatesBoolWithExistsAndMustNot() + throws ExpressionNotAnalyzableException { + // NOT(LIKE(field, pattern)) should generate bool query with must(exists) + mustNot(wildcard) + List arguments = + Arrays.asList(field2, builder.makeLiteral("%Hi%"), builder.makeLiteral(true)); + RexNode likeCall = + PPLFuncImpTable.INSTANCE.resolve(builder, "like", arguments.toArray(new RexNode[0])); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, likeCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "exists" : { + "field" : "b", + "boost" : 1.0 + } + } + ], + "must_not" : [ + { + "wildcard" : { + "b.keyword" : { + "wildcard" : "*Hi*", + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notGreaterThan_generatesExistsAndMustNotRange() throws ExpressionNotAnalyzableException { + // NOT(a > 12) should generate bool query with must(exists) + mustNot(range) + RexNode gtCall = builder.makeCall(SqlStdOperatorTable.GREATER_THAN, field1, numericLiteral); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, gtCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + } + ], + "must_not" : [ + { + "range" : { + "a" : { + "from" : 12, + "to" : null, + "include_lower" : false, + "include_upper" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notIsNotNull_generatesOnlyMustNotExists() throws ExpressionNotAnalyzableException { + // NOT(IS_NOT_NULL(a)) = IS_NULL(a) should generate must_not(exists) WITHOUT an exists in must + RexNode isNotNullCall = builder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, field1); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, isNotNullCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must_not" : [ + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notIsTrue_generatesOnlyMustNotTerm() throws ExpressionNotAnalyzableException { + // NOT(IS_TRUE(e)) should generate must_not(term(e, true)) WITHOUT an exists filter + RexNode isTrueCall = builder.makeCall(SqlStdOperatorTable.IS_TRUE, field5); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, isTrueCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must_not" : [ + { + "term" : { + "e" : { + "value" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void notIsFalse_generatesOnlyMustNotTerm() throws ExpressionNotAnalyzableException { + // NOT(IS_FALSE(e)) should generate must_not(term(e, false)) WITHOUT an exists filter + RexNode isFalseCall = builder.makeCall(SqlStdOperatorTable.IS_FALSE, field5); + RexNode notCall = builder.makeCall(SqlStdOperatorTable.NOT, isFalseCall); + QueryBuilder result = PredicateAnalyzer.analyze(notCall, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must_not" : [ + { + "term" : { + "e" : { + "value" : false, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/AggregationResponseUtils.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/AggregationResponseUtils.java index f230bae5a8a..38935d27007 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/AggregationResponseUtils.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/AggregationResponseUtils.java @@ -5,13 +5,12 @@ package org.opensearch.sql.opensearch.response; -import com.fasterxml.jackson.core.JsonFactory; import com.google.common.collect.ImmutableMap; import java.io.IOException; import java.util.List; import java.util.stream.Collectors; import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.ParseField; import org.opensearch.core.xcontent.ContextParser; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -111,10 +110,8 @@ Aggregation.class, new ParseField(entry.getKey()), entry.getValue())) public static Aggregations fromJson(String json) { try { XContentParser contentParser = - new JsonXContentParser( - namedXContentRegistry, - LoggingDeprecationHandler.INSTANCE, - new JsonFactory().createParser(json)); + JsonXContent.jsonXContent.createParser( + namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, json); contentParser.nextToken(); return Aggregations.fromXContent(contentParser); } catch (IOException e) { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngineTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngineTest.java index 38f2ae495e0..fa04395e065 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngineTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchStorageEngineTest.java @@ -11,12 +11,14 @@ import static org.opensearch.sql.analysis.DataSourceSchemaIdentifierNameResolver.DEFAULT_DATASOURCE_NAME; import static org.opensearch.sql.utils.SystemIndexUtils.TABLE_INFO; +import java.util.Collection; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.DataSourceSchemaName; import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.expression.function.FunctionResolver; import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.storage.system.OpenSearchSystemIndex; import org.opensearch.sql.storage.Table; @@ -36,6 +38,15 @@ public void getTable() { assertAll(() -> assertNotNull(table), () -> assertTrue(table instanceof OpenSearchIndex)); } + @Test + public void getFunctionsReturnsVectorSearchResolver() { + OpenSearchStorageEngine engine = new OpenSearchStorageEngine(client, settings); + Collection functions = engine.getFunctions(); + assertTrue( + functions.stream().anyMatch(f -> f instanceof VectorSearchTableFunctionResolver), + "getFunctions() should contain a VectorSearchTableFunctionResolver"); + } + @Test public void getSystemTable() { OpenSearchStorageEngine engine = new OpenSearchStorageEngine(client, settings); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchIndexTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchIndexTest.java new file mode 100644 index 00000000000..6a9a76a48f0 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchIndexTest.java @@ -0,0 +1,266 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableMap; +import java.util.LinkedHashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; +import org.opensearch.sql.opensearch.mapping.IndexMapping; + +@ExtendWith(MockitoExtension.class) +class VectorSearchIndexTest { + + @Mock private OpenSearchClient client; + + @Mock private Settings settings; + + @Mock private IndexMapping indexMapping; + + @Test + void buildKnnQueryJsonTopK() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f, 2.0f, 3.0f}, + Map.of("k", "5")); + + String json = index.buildKnnQueryJson(); + assertEquals("{\"knn\":{\"embedding\":{\"vector\":[1.0,2.0,3.0],\"k\":5}}}", json); + } + + @Test + void buildKnnQueryJsonRadialMaxDistance() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f, 2.0f}, + Map.of("max_distance", "10.5")); + + String json = index.buildKnnQueryJson(); + assertEquals("{\"knn\":{\"embedding\":{\"vector\":[1.0,2.0],\"max_distance\":10.5}}}", json); + } + + @Test + void buildKnnQueryJsonRadialMinScore() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {0.5f}, + Map.of("min_score", "0.8")); + + String json = index.buildKnnQueryJson(); + assertEquals("{\"knn\":{\"embedding\":{\"vector\":[0.5],\"min_score\":0.8}}}", json); + } + + @Test + void buildKnnQueryJsonNestedFieldName() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "doc.embedding", + new float[] {1.0f, 2.0f}, + Map.of("k", "10")); + + String json = index.buildKnnQueryJson(); + assertTrue(json.contains("\"doc.embedding\""), "Should contain nested field name with dot"); + } + + @Test + void buildKnnQueryJsonMultiElementVector() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f, -2.5f, 0.0f, 3.14f, 100.0f}, + Map.of("k", "3")); + + String json = index.buildKnnQueryJson(); + assertTrue( + json.contains("[1.0,-2.5,0.0,3.14,100.0]"), + "Should contain all vector components with correct comma separation"); + } + + @Test + void buildKnnQueryJsonSingleElementVector() { + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {42.0f}, Map.of("k", "1")); + + String json = index.buildKnnQueryJson(); + assertTrue(json.contains("[42.0]"), "Should contain single-element vector"); + } + + @Test + void buildKnnQueryJsonNumericOptionRenderedUnquoted() { + LinkedHashMap options = new LinkedHashMap<>(); + options.put("k", "5"); + + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {1.0f}, options); + + String json = index.buildKnnQueryJson(); + assertTrue(json.contains("\"k\":5"), "Numeric option should be unquoted"); + } + + @Test + void buildKnnQueryJsonNonNumericOptionRenderedQuoted() { + LinkedHashMap options = new LinkedHashMap<>(); + options.put("k", "5"); + options.put("method", "hnsw"); + + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {1.0f}, options); + + String json = index.buildKnnQueryJson(); + assertTrue(json.contains("\"method\":\"hnsw\""), "Non-numeric option should be quoted"); + assertTrue(json.contains("\"k\":5"), "Numeric option should be unquoted"); + } + + @Test + void buildKnnQueryJsonWithFilterEmbeds() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f, 2.0f}, + Map.of("k", "5"), + FilterType.EFFICIENT); + + String filterJson = "{\"term\":{\"city\":{\"value\":\"Miami\"}}}"; + String json = index.buildKnnQueryJson(filterJson); + + assertTrue(json.contains("\"filter\""), "Should contain filter field"); + assertTrue(json.contains("\"term\""), "Should contain the filter content"); + assertTrue(json.contains("\"k\":5"), "Should still contain k"); + assertTrue(json.contains("\"vector\":[1.0,2.0]"), "Should contain vector"); + } + + @Test + void buildKnnQueryJsonWithFilterRadial() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f}, + Map.of("max_distance", "10.5"), + FilterType.EFFICIENT); + + String filterJson = "{\"range\":{\"rating\":{\"gte\":4.0}}}"; + String json = index.buildKnnQueryJson(filterJson); + + assertTrue(json.contains("\"max_distance\":10.5"), "Should contain max_distance"); + assertTrue(json.contains("\"filter\""), "Should contain filter"); + } + + @Test + void buildKnnQueryJsonNullFilterProducesBaseJson() { + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f}, + Map.of("k", "5"), + null); + + String json = index.buildKnnQueryJson(null); + String baseJson = index.buildKnnQueryJson(); + + assertEquals(baseJson, json, "null filter should produce same JSON as no-arg version"); + assertFalse(json.contains("\"filter\""), "Should not contain filter field"); + } + + @Test + void buildKnnQueryJsonExcludesFilterType() { + LinkedHashMap options = new LinkedHashMap<>(); + options.put("k", "5"); + + VectorSearchIndex index = + new VectorSearchIndex( + client, + settings, + "test-index", + "embedding", + new float[] {1.0f}, + options, + FilterType.EFFICIENT); + + String json = index.buildKnnQueryJson(); + assertFalse(json.contains("filter_type"), "filter_type should not appear in knn JSON"); + assertTrue(json.contains("\"k\":5"), "k should still be present"); + } + + @Test + void isInstanceOfOpenSearchIndex() { + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {1.0f}, Map.of("k", "5")); + assertTrue(index instanceof OpenSearchIndex); + } + + @Test + void createScanBuilderRejectsIndexWithScoreField() { + // A mapping that declares a user field named _score cannot coexist with the synthetic + // v._score column exposed by vectorSearch(); the guard in createScanBuilder should reject + // it with a clear, user-facing error. + lenient() + .when(settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE)) + .thenReturn(TimeValue.timeValueMinutes(1)); + when(indexMapping.getFieldMappings()) + .thenReturn(Map.of("_score", OpenSearchDataType.of(MappingType.Float))); + when(client.getIndexMappings("test-index")) + .thenReturn(ImmutableMap.of("test-index", indexMapping)); + + VectorSearchIndex index = + new VectorSearchIndex( + client, settings, "test-index", "embedding", new float[] {1.0f}, Map.of("k", "5")); + + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, index::createScanBuilder); + assertTrue( + ex.getMessage().contains("_score"), + "error message should mention the colliding _score field"); + assertTrue( + ex.getMessage().contains("collides"), + "error message should describe the collision, got: " + ex.getMessage()); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementationTest.java new file mode 100644 index 00000000000..7bd64838876 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionImplementationTest.java @@ -0,0 +1,778 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; +import org.opensearch.sql.storage.Table; + +@ExtendWith(MockitoExtension.class) +class VectorSearchTableFunctionImplementationTest { + + @Mock private OpenSearchClient client; + + @Mock private Settings settings; + + // No-op capability — tests in this class don't exercise the k-NN plugin probe. + // Dedicated tests for the probe live in KnnPluginCapabilityTest. + private final KnnPluginCapability knnCapability = + org.mockito.Mockito.mock(KnnPluginCapability.class); + + @Test + void testValueOfThrows() { + VectorSearchTableFunctionImplementation impl = createImpl(); + UnsupportedOperationException ex = + assertThrows(UnsupportedOperationException.class, () -> impl.valueOf()); + assertTrue(ex.getMessage().contains("only supported in FROM clause")); + } + + @Test + void testType() { + VectorSearchTableFunctionImplementation impl = createImpl(); + assertEquals(ExprCoreType.STRUCT, impl.type()); + } + + @Test + void testToString() { + VectorSearchTableFunctionImplementation impl = createImpl(); + String str = impl.toString(); + assertTrue(str.contains("vectorsearch")); + assertTrue(str.contains("table=")); + assertTrue(str.contains("my-index")); + } + + @Test + void testApplyArguments() { + VectorSearchTableFunctionImplementation impl = createImpl(); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testApplyArgumentsDoesNotProbeKnnCapability() { + // Contract: applyArguments() runs during analysis (including _explain) and must NOT invoke + // the k-NN plugin probe. The probe is deferred to scan open() so pluginless clusters can + // still explain and validate vectorSearch() queries locally. + KnnPluginCapability observingCapability = org.mockito.Mockito.mock(KnnPluginCapability.class); + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, observingCapability); + impl.applyArguments(); + org.mockito.Mockito.verify(observingCapability, org.mockito.Mockito.never()).requireInstalled(); + } + + @Test + void testApplyArgumentsWithBracketedVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0, 3.0]", "k=5"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testApplyArgumentsWithUnbracketedVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "1.0, 2.0, 3.0", "k=5"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testUnknownOptionKeyThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=10,method.ef_search=100"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Unknown option key")); + assertTrue(ex.getMessage().contains("method.ef_search")); + } + + @Test + void testApplyArgumentsWithMaxDistance() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=10.0"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testApplyArgumentsWithMinScore() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=0.5"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testUnknownOptionKeyOnlyThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "not_a_key=100"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Unknown option key")); + } + + @Test + void testParseOptionsMultiple() { + Map opts = + VectorSearchTableFunctionImplementation.parseOptions("k=5,max_distance=10.0"); + assertEquals("5", opts.get("k")); + assertEquals("10.0", opts.get("max_distance")); + } + + @Test + void testMalformedOptionSegmentThrows() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=5,badoption")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void testDuplicateOptionKeyThrows() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=5,k=10")); + assertTrue(ex.getMessage().contains("Duplicate option key")); + } + + @Test + void testNoRequiredOptionThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", ""); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Missing required option")); + } + + @Test + void testEmptyVectorThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must not be empty")); + } + + @Test + void testMalformedVectorComponentThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, abc, 3.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid vector component")); + } + + @Test + void testNonFiniteVectorComponentThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, Infinity, 3.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testMissingArgumentThrows() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertEquals("Missing required argument: option", ex.getMessage()); + } + + @Test + void testInvalidFieldNameThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "field\"injection", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid field name")); + } + + @Test + void testNestedFieldNameAllowed() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "doc.embedding", "[1.0, 2.0]", "k=5"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testNonNumericKThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=abc"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be an integer")); + } + + @Test + void testNonNumericMaxDistanceThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=notanumber"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a number")); + } + + @Test + void testInfiniteMinScoreThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=Infinity"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testMutualExclusivityKAndMaxDistanceThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,max_distance=10.0"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Only one of")); + } + + @Test + void testMutualExclusivityKAndMinScoreThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,min_score=0.5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Only one of")); + } + + @Test + void testMutualExclusivityAllThreeThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs( + "my-index", "embedding", "[1.0, 2.0]", "k=5,max_distance=10.0,min_score=0.5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Only one of")); + } + + @Test + void testKTooSmallThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=0"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("k must be between 1 and 10000")); + } + + @Test + void testKTooLargeThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=10001"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("k must be between 1 and 10000")); + } + + @Test + void testKBoundaryValuesAllowed() { + // k=1 should work + VectorSearchTableFunctionImplementation impl1 = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=1"); + assertTrue(impl1.applyArguments() instanceof VectorSearchIndex); + + // k=10000 should work + VectorSearchTableFunctionImplementation impl2 = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=10000"); + assertTrue(impl2.applyArguments() instanceof VectorSearchIndex); + } + + @Test + void testNonNamedArgThrows() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = List.of(DSL.literal("my-index")); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("requires named arguments")); + } + + @Test + void testNullArgNameThrows() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument(null, DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("requires named arguments")); + } + + @Test + void testNaNVectorComponentThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, NaN, 3.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testEmptyOptionKeyThrows() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("=value")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void testEmptyOptionValueThrows() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("key=")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void testNegativeKThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=-1"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("k must be between 1 and 10000")); + } + + @Test + void testNaNMaxDistanceThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=NaN"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testNaNMinScoreThrows() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=NaN"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("must be a finite number")); + } + + @Test + void testCaseInsensitiveArgLookup() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("TABLE", DSL.literal("my-index")), + DSL.namedArgument("FIELD", DSL.literal("embedding")), + DSL.namedArgument("VECTOR", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("OPTION", DSL.literal("k=5"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testInvalidFilterTypeRejects() { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("option", DSL.literal("k=5,filter_type=invalid"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, impl::applyArguments); + assertTrue(ex.getMessage().contains("filter_type must be one of")); + } + + @Test + void testFilterTypePostAccepted() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,filter_type=post"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testFilterTypeEfficientAccepted() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,filter_type=efficient"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void testParseOptionsPreservesFilterTypeValue() { + Map options = + VectorSearchTableFunctionImplementation.parseOptions("k=5,filter_type=post"); + assertEquals("post", options.get("filter_type")); + } + + @Test + void applyArguments_rejectsInvalidTableName() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("idx\"; DROP", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + assertTrue( + ex.getMessage() + .contains("must contain only alphanumeric characters, dots, underscores, or hyphens")); + } + + @Test + void applyArguments_rejectsAllRoutingTarget() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("_all", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + assertTrue(ex.getMessage().contains("_all")); + } + + @Test + void applyArguments_rejectsSingleDotTable() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs(".", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + } + + @Test + void applyArguments_rejectsDoubleDotTable() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("..", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + } + + @Test + void applyArguments_rejectsWildcardTableWithDedicatedMessage() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("sql_vector_*", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + assertTrue(ex.getMessage().contains("wildcards ('*')")); + assertTrue(ex.getMessage().contains("single concrete index")); + } + + @Test + void applyArguments_rejectsBareStarTableWithDedicatedMessage() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("*", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("wildcards ('*')")); + } + + @Test + void applyArguments_rejectsMultiTargetTableWithDedicatedMessage() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("idx_a,idx_b", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid table name")); + assertTrue(ex.getMessage().contains("multi-target")); + assertTrue(ex.getMessage().contains("single concrete index")); + } + + @Test + void applyArguments_rejectsMidNameStarTable() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("foo*bar", "embedding", "[1.0, 2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("wildcards ('*')")); + } + + @Test + void validateNamedArgs_rejectsDuplicateNames() { + // Two occurrences of "table" reach the Implementation layer directly (bypassing the resolver). + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal("a")), + DSL.namedArgument("table", DSL.literal("b")), + DSL.namedArgument("vector", DSL.literal("[1.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + VectorSearchTableFunctionImplementation impl = + new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Duplicate argument name")); + assertTrue(ex.getMessage().contains("table")); + } + + // ── Option parsing: empty value, whitespace, unknown keys ──────────── + + @Test + void parseOptions_rejectsEmptyValue() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void parseOptions_rejectsEmptyValueInMidSegment() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=,filter_type=post")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void parseOptions_trimsWhitespaceAroundKeyAndValue() { + Map options = + VectorSearchTableFunctionImplementation.parseOptions(" k = 5 , filter_type = post "); + assertEquals("5", options.get("k")); + assertEquals("post", options.get("filter_type")); + } + + @Test + void applyArguments_rejectsUnknownOptionKey() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs( + "my-index", "embedding", "[1.0, 2.0]", "k=5,method_parameters.ef_search=100"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Unknown option key")); + assertTrue(ex.getMessage().contains("method_parameters.ef_search")); + } + + // ── Vector parsing: non-comma separator ───────────────────────────── + + @Test + void applyArguments_rejectsSemicolonSeparatorInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0;2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("vector=")); + assertTrue(ex.getMessage().contains("comma-separated")); + } + + @Test + void applyArguments_rejectsColonSeparatorInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0:2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("vector=")); + } + + @Test + void applyArguments_rejectsPipeSeparatorInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0|2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("vector=")); + } + + // ── Option bounds: negative k, min_score, max_distance ────────────── + + @Test + void applyArguments_negativeKMessageCitesRange() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=-3"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("1")); + assertTrue(ex.getMessage().contains("10000")); + } + + @Test + void applyArguments_rejectsNegativeMinScore() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=-0.5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("min_score")); + assertTrue(ex.getMessage().contains("non-negative")); + } + + @Test + void applyArguments_rejectsNegativeMaxDistance() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=-1.0"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("max_distance")); + assertTrue(ex.getMessage().contains("non-negative")); + } + + @Test + void applyArguments_acceptsZeroMinScore() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "min_score=0"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + @Test + void applyArguments_acceptsZeroMaxDistance() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "max_distance=0"); + Table table = impl.applyArguments(); + assertTrue(table instanceof VectorSearchIndex); + } + + // ── Vector parsing: trailing / empty components (PR #5381 review) ───── + + @Test + void applyArguments_rejectsTrailingCommaInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0,2.0,]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid vector component")); + assertTrue(ex.getMessage().contains("trailing or consecutive commas")); + } + + @Test + void applyArguments_rejectsConsecutiveCommasInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0,,2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid vector component")); + assertTrue(ex.getMessage().contains("trailing or consecutive commas")); + } + + @Test + void applyArguments_rejectsLeadingCommaInVector() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[,1.0,2.0]", "k=5"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + assertTrue(ex.getMessage().contains("Invalid vector component")); + } + + // ── Option parsing: empty segments (PR #5381 review) ───────────────── + + @Test + void parseOptions_rejectsTrailingEmptySegment() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=5,")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + assertTrue(ex.getMessage().contains("trailing or consecutive commas")); + } + + @Test + void parseOptions_rejectsLeadingEmptySegment() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions(",k=5")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + @Test + void parseOptions_rejectsConsecutiveCommas() { + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchTableFunctionImplementation.parseOptions("k=5,,filter_type=post")); + assertTrue(ex.getMessage().contains("Malformed option segment")); + } + + // ── Unknown-key error lists supported keys in stable order (PR #5381 review) ── + + @Test + void applyArguments_unknownOptionKeyErrorListsSupportedKeysInStableOrder() { + VectorSearchTableFunctionImplementation impl = + createImplWithArgs("my-index", "embedding", "[1.0, 2.0]", "k=5,bogus=1"); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> impl.applyArguments()); + // Match the rendered list literal (e.g. "[k, max_distance, min_score, filter_type]") rather + // than searching for the substring "k", which would match the first "k" in "Unknown option + // key" and reduce the assertion to a tautology. + assertTrue( + ex.getMessage().contains("[k, max_distance, min_score, filter_type]"), + "expected stable key order in error; got: " + ex.getMessage()); + } + + @Test + void parseOptions_emptyStringReturnsEmptyMap() { + // The wholly empty option string is explicitly allowed through parseOptions so it flows to + // the "Missing required option" gate in validateOptions. Pins that contract. + Map opts = VectorSearchTableFunctionImplementation.parseOptions(""); + assertTrue(opts.isEmpty()); + } + + private VectorSearchTableFunctionImplementation createImpl() { + return createImplWithArgs("my-index", "embedding", "[1.0, 2.0, 3.0]", "k=5"); + } + + private VectorSearchTableFunctionImplementation createImplWithArgs( + String table, String field, String vector, String option) { + FunctionName functionName = FunctionName.of("vectorsearch"); + List args = + List.of( + DSL.namedArgument("table", DSL.literal(table)), + DSL.namedArgument("field", DSL.literal(field)), + DSL.namedArgument("vector", DSL.literal(vector)), + DSL.namedArgument("option", DSL.literal(option))); + return new VectorSearchTableFunctionImplementation( + functionName, args, client, settings, knnCapability); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolverTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolverTest.java new file mode 100644 index 00000000000..c6fece7bf32 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/VectorSearchTableFunctionResolverTest.java @@ -0,0 +1,208 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.List; +import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.function.FunctionBuilder; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.expression.function.FunctionProperties; +import org.opensearch.sql.expression.function.FunctionSignature; +import org.opensearch.sql.expression.function.TableFunctionImplementation; +import org.opensearch.sql.opensearch.client.OpenSearchClient; + +@ExtendWith(MockitoExtension.class) +class VectorSearchTableFunctionResolverTest { + + @Mock private OpenSearchClient client; + + @Mock private Settings settings; + + @Mock private FunctionProperties functionProperties; + + @Test + void testResolve() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0, 3.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + + Pair resolution = resolver.resolve(functionSignature); + + assertEquals(functionName, resolution.getKey().getFunctionName()); + assertEquals(functionName, resolver.getFunctionName()); + assertEquals(List.of(STRING, STRING, STRING, STRING), resolution.getKey().getParamTypeList()); + + TableFunctionImplementation impl = + (TableFunctionImplementation) resolution.getValue().apply(functionProperties, expressions); + assertTrue(impl instanceof VectorSearchTableFunctionImplementation); + } + + @Test + void testWrongArgumentCount() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + + Pair resolution = resolver.resolve(functionSignature); + FunctionBuilder builder = resolution.getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("requires 4 arguments")); + } + + @Test + void testTooManyArguments() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0]")), + DSL.namedArgument("option", DSL.literal("k=5")), + DSL.namedArgument("extra", DSL.literal("unexpected"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + + Pair resolution = resolver.resolve(functionSignature); + FunctionBuilder builder = resolution.getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("requires 4 arguments")); + } + + @Test + void testZeroArguments() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = List.of(); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + + Pair resolution = resolver.resolve(functionSignature); + FunctionBuilder builder = resolution.getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("requires 4 arguments")); + } + + @Test + void resolve_rejectsPositionalArgument() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + // One positional literal mixed with three named arguments. Arity passes, but the resolver + // must reject this before planning so the SQL layer returns a clean 400 rather than a 200 + // with zero rows. + List expressions = + List.of( + DSL.literal("my-index"), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + FunctionBuilder builder = resolver.resolve(functionSignature).getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("requires named arguments")); + } + + @Test + void resolve_rejectsDuplicateNamedArgument() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("a")), + DSL.namedArgument("table", DSL.literal("b")), + DSL.namedArgument("vector", DSL.literal("[1.0]")), + DSL.namedArgument("option", DSL.literal("k=5"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + FunctionBuilder builder = resolver.resolve(functionSignature).getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("Duplicate argument name")); + assertTrue(ex.getMessage().contains("table")); + } + + @Test + void resolve_rejectsUnknownArgumentName() { + VectorSearchTableFunctionResolver resolver = + new VectorSearchTableFunctionResolver(client, settings); + FunctionName functionName = FunctionName.of("vectorsearch"); + List expressions = + List.of( + DSL.namedArgument("table", DSL.literal("my-index")), + DSL.namedArgument("field", DSL.literal("embedding")), + DSL.namedArgument("vector", DSL.literal("[1.0, 2.0]")), + DSL.namedArgument("bogus", DSL.literal("k=5"))); + FunctionSignature functionSignature = + new FunctionSignature( + functionName, expressions.stream().map(Expression::type).collect(Collectors.toList())); + FunctionBuilder builder = resolver.resolve(functionSignature).getValue(); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> builder.apply(functionProperties, expressions)); + assertTrue(ex.getMessage().contains("Unknown argument name")); + assertTrue(ex.getMessage().contains("bogus")); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapabilityTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapabilityTest.java new file mode 100644 index 00000000000..147a5a093ce --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/capability/KnnPluginCapabilityTest.java @@ -0,0 +1,129 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.capability; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.List; +import java.util.Optional; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.action.admin.cluster.node.info.NodeInfo; +import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest; +import org.opensearch.action.admin.cluster.node.info.NodesInfoResponse; +import org.opensearch.action.admin.cluster.node.info.PluginsAndModules; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.plugins.PluginInfo; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.transport.client.AdminClient; +import org.opensearch.transport.client.ClusterAdminClient; +import org.opensearch.transport.client.node.NodeClient; + +@ExtendWith(MockitoExtension.class) +class KnnPluginCapabilityTest { + + @Mock private OpenSearchClient client; + @Mock private NodeClient nodeClient; + @Mock private AdminClient adminClient; + @Mock private ClusterAdminClient clusterAdminClient; + @Mock private ActionFuture nodesInfoFuture; + + @Test + void skipsWhenNodeClientAbsent() { + when(client.getNodeClient()).thenReturn(Optional.empty()); + KnnPluginCapability capability = new KnnPluginCapability(client); + // No exception — REST-client mode cannot probe; execution-time errors remain the signal. + assertDoesNotThrow(capability::requireInstalled); + } + + @Test + void passesWhenKnnPluginInstalled() { + stubNodesInfo(pluginInfo("org.opensearch.knn.plugin.KNNPlugin")); + KnnPluginCapability capability = new KnnPluginCapability(client); + assertDoesNotThrow(capability::requireInstalled); + } + + @Test + void throwsWhenKnnPluginAbsent() { + stubNodesInfo(pluginInfo("org.opensearch.security.OpenSearchSecurityPlugin")); + KnnPluginCapability capability = new KnnPluginCapability(client); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, capability::requireInstalled); + assertTrue( + ex.getMessage().contains("k-NN plugin"), + "Expected k-NN plugin message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("not installed"), + "Expected 'not installed' phrasing, got: " + ex.getMessage()); + } + + @Test + void cachesSuccessfulProbeResult() { + stubNodesInfo(pluginInfo("org.opensearch.knn.plugin.KNNPlugin")); + KnnPluginCapability capability = new KnnPluginCapability(client); + capability.requireInstalled(); + capability.requireInstalled(); + capability.requireInstalled(); + // Probe fires once regardless of how many times requireInstalled() is called. + verify(clusterAdminClient, times(1)).nodesInfo(any(NodesInfoRequest.class)); + } + + @Test + void cachesNegativeProbeResult() { + stubNodesInfo(pluginInfo("org.opensearch.security.OpenSearchSecurityPlugin")); + KnnPluginCapability capability = new KnnPluginCapability(client); + assertThrows(ExpressionEvaluationException.class, capability::requireInstalled); + assertThrows(ExpressionEvaluationException.class, capability::requireInstalled); + verify(clusterAdminClient, times(1)).nodesInfo(any(NodesInfoRequest.class)); + } + + @Test + void doesNotCacheOnProbeFailure() { + when(client.getNodeClient()).thenReturn(Optional.of(nodeClient)); + when(nodeClient.admin()).thenReturn(adminClient); + when(adminClient.cluster()).thenReturn(clusterAdminClient); + when(clusterAdminClient.nodesInfo(any(NodesInfoRequest.class))).thenReturn(nodesInfoFuture); + when(nodesInfoFuture.actionGet()).thenThrow(new RuntimeException("transport error")); + + KnnPluginCapability capability = new KnnPluginCapability(client); + assertDoesNotThrow(capability::requireInstalled); // probe failed — treat as unknown + assertDoesNotThrow(capability::requireInstalled); + // Probe retries on each call after a failure — failures are not cached. + verify(clusterAdminClient, times(2)).nodesInfo(any(NodesInfoRequest.class)); + } + + private void stubNodesInfo(PluginInfo... plugins) { + when(client.getNodeClient()).thenReturn(Optional.of(nodeClient)); + when(nodeClient.admin()).thenReturn(adminClient); + when(adminClient.cluster()).thenReturn(clusterAdminClient); + when(clusterAdminClient.nodesInfo(any(NodesInfoRequest.class))).thenReturn(nodesInfoFuture); + + NodeInfo nodeInfo = mock(NodeInfo.class); + PluginsAndModules pam = mock(PluginsAndModules.class); + when(nodeInfo.getInfo(PluginsAndModules.class)).thenReturn(pam); + when(pam.getPluginInfos()).thenReturn(List.of(plugins)); + + NodesInfoResponse response = mock(NodesInfoResponse.class); + when(response.getNodes()).thenReturn(List.of(nodeInfo)); + when(nodesInfoFuture.actionGet()).thenReturn(response); + } + + private PluginInfo pluginInfo(String classname) { + PluginInfo pluginInfo = mock(PluginInfo.class); + when(pluginInfo.getClassname()).thenReturn(classname); + return pluginInfo; + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java index a91c99e26cd..bf83d972dbe 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java @@ -39,7 +39,6 @@ import org.apache.calcite.sql.type.SqlTypeFactoryImpl; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -47,10 +46,8 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.setting.Settings.Key; -import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; -import org.opensearch.sql.opensearch.storage.scan.context.AggPushDownAction; -import org.opensearch.sql.opensearch.storage.scan.context.AggregationBuilderAction; +import org.opensearch.sql.opensearch.storage.scan.context.AggSpec; import org.opensearch.sql.opensearch.storage.scan.context.FilterDigest; import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; @@ -60,6 +57,7 @@ @ExtendWith(MockitoExtension.class) public class CalciteIndexScanCostTest { static final RelDataTypeFactory typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + private static final OSRequestBuilderAction NO_OP_ACTION = req -> {}; final RexBuilder builder = new RexBuilder(typeFactory); @Mock private static RelOptCluster cluster; @@ -210,17 +208,12 @@ void test_cost_on_aggregate_pushdown() { null, List.of()); when(mq.getRowCount(aggregate)).thenReturn(1000d); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(1)); lenient().when(relDataType.getFieldCount()).thenReturn(1); lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals(1800, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -233,11 +226,6 @@ void test_cost_on_aggregate_pushdown_with_one_aggCall() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -266,7 +254,7 @@ public void apply(OpenSearchRequestBuilder requestBuilder) {} lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals(2812.5, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -279,11 +267,6 @@ void test_cost_on_aggregate_pushdown_with_two_aggCall() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - }; AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -325,7 +308,7 @@ public void apply(OpenSearchRequestBuilder requestBuilder) {} lenient().when(table.getRowType()).thenReturn(relDataType); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals( 3836.2500429153442, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -339,16 +322,8 @@ void test_cost_on_aggregate_pushdown_with_one_aggCall_with_script() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - - @Override - public long getScriptCount() { - return 1; - } - }; + AggSpec aggSpec = mock(AggSpec.class); + when(aggSpec.getScriptCount()).thenReturn(1L); AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -375,9 +350,10 @@ public long getScriptCount() { lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(2)); lenient().when(relDataType.getFieldCount()).thenReturn(2); lenient().when(table.getRowType()).thenReturn(relDataType); + scan.getPushDownContext().setAggSpec(aggSpec); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); assertEquals( 2913.7500643730164, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } @@ -474,16 +450,8 @@ void test_cost_on_aggregate_pushdown_along_with_others() { lenient().when(table.getRowType()).thenReturn(relDataType); CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - AggPushDownAction action = - new AggPushDownAction(Pair.of(List.of(), null), null, List.of()) { - @Override - public void apply(OpenSearchRequestBuilder requestBuilder) {} - - @Override - public long getScriptCount() { - return 1; - } - }; + AggSpec aggSpec = mock(AggSpec.class); + when(aggSpec.getScriptCount()).thenReturn(1L); AggregateCall countCall = AggregateCall.create( SqlStdOperatorTable.COUNT, @@ -510,6 +478,7 @@ public long getScriptCount() { lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(2)); lenient().when(relDataType.getFieldCount()).thenReturn(2); lenient().when(table.getRowType()).thenReturn(relDataType); + scan.getPushDownContext().setAggSpec(aggSpec); List projectDigest1 = List.of("A", "B"); scan.getPushDownContext() @@ -517,19 +486,15 @@ public long getScriptCount() { new PushDownOperation( PushDownType.PROJECT, projectDigest1, (OSRequestBuilderAction) req -> {})); scan.getPushDownContext() - .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, action)); + .add(new PushDownOperation(PushDownType.AGGREGATION, aggregate, NO_OP_ACTION)); List projectDigest2 = List.of("COUNT"); scan.getPushDownContext() - .add( - new PushDownOperation( - PushDownType.PROJECT, projectDigest2, (AggregationBuilderAction) req -> {})); + .add(new PushDownOperation(PushDownType.PROJECT, projectDigest2, NO_OP_ACTION)); scan.getPushDownContext() .add(new PushDownOperation(PushDownType.SORT, null, (OSRequestBuilderAction) req -> {})); LimitDigest limitDigest = new LimitDigest(100, 0); scan.getPushDownContext() - .add( - new PushDownOperation( - PushDownType.LIMIT, limitDigest, (AggregationBuilderAction) req -> {})); + .add(new PushDownOperation(PushDownType.LIMIT, limitDigest, NO_OP_ACTION)); lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(projectDigest2.size())); assertEquals( 2102.8500643730163, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilderTest.java new file mode 100644 index 00000000000..ce2f2efb824 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanBuilderTest.java @@ -0,0 +1,234 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; + +import com.google.common.collect.ImmutableList; +import java.util.Collections; +import org.junit.jupiter.api.Test; +import org.opensearch.index.query.WrapperQueryBuilder; +import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.logical.LogicalProject; +import org.opensearch.sql.planner.logical.LogicalSort; +import org.opensearch.sql.planner.logical.LogicalValues; + +class VectorSearchIndexScanBuilderTest { + + private VectorSearchIndexScanBuilder newScanBuilder() { + var requestBuilder = + new OpenSearchRequestBuilder( + mock(OpenSearchExprValueFactory.class), 10000, mock(Settings.class)); + var queryBuilder = + new VectorSearchQueryBuilder( + requestBuilder, new WrapperQueryBuilder("{\"knn\":{}}"), java.util.Map.of("k", "5")); + return new VectorSearchIndexScanBuilder(queryBuilder, rb -> mock(OpenSearchIndexScan.class)); + } + + private static LogicalProject project(LogicalPlan input) { + NamedExpression field = DSL.named("id", DSL.ref("id", ExprCoreType.STRING)); + return new LogicalProject(input, ImmutableList.of(field), ImmutableList.of()); + } + + private static LogicalFilter filter(LogicalPlan input) { + return new LogicalFilter( + input, DSL.less(DSL.ref("price", ExprCoreType.INTEGER), DSL.literal(150))); + } + + private static LogicalSort sort(LogicalPlan input) { + return new LogicalSort( + input, + ImmutableList.of( + org.apache.commons.lang3.tuple.Pair.of( + Sort.SortOption.DEFAULT_DESC, DSL.ref("price", ExprCoreType.INTEGER)))); + } + + private static LogicalLimit limit(LogicalPlan input, int offset) { + return new LogicalLimit(input, 10, offset); + } + + private static LogicalAggregation aggregation(LogicalPlan input) { + return new LogicalAggregation(input, Collections.emptyList(), Collections.emptyList(), false); + } + + @Test + void pushDownAggregationIsRejected() { + var scanBuilder = newScanBuilder(); + + var agg = + new LogicalAggregation( + new LogicalValues(Collections.emptyList()), + Collections.emptyList(), + Collections.emptyList(), + false); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, () -> scanBuilder.pushDownAggregation(agg)); + assertTrue( + ex.getMessage().contains("Aggregations are not supported"), + "Error should state aggregations are not supported; actual: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("vectorSearch"), + "Error should mention vectorSearch; actual: " + ex.getMessage()); + } + + @Test + void validatePlanRejectsOuterFilterOverSubqueryProject() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v) t WHERE t.price < 150 + // Shape after optimizer: Project(outer) → Filter → Project(inner) → scanBuilder + var scanBuilder = newScanBuilder(); + LogicalPlan root = project(filter(project(scanBuilder))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer WHERE on a vectorSearch() subquery"), + "Error should mention outer WHERE on subquery; actual: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("silently yield zero rows"), + "Error should explain silent zero rows; actual: " + ex.getMessage()); + } + + @Test + void validatePlanRejectsDoubleWrappedOuterFilter() { + // Models nested subqueries: + // SELECT * FROM (SELECT * FROM (SELECT v.id FROM vs(...) AS v) t1) t2 WHERE t2.price < 150 + var scanBuilder = newScanBuilder(); + LogicalPlan root = filter(project(project(scanBuilder))); + + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanAllowsFilterDirectlyAboveScanBuilder() { + // Models: SELECT v.id FROM vs(...) AS v WHERE v.gender='M' + // Here the filter would normally be pushed down and removed, but if it were kept (e.g. a + // non-pushdownable predicate), validatePlan must not reject it — it is already at the + // vectorSearch level, not an outer filter. + var scanBuilder = newScanBuilder(); + LogicalPlan root = project(filter(scanBuilder)); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanAllowsInnerFilterWrappedInOuterProject() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v WHERE v.gender='M') t + // After pushdown the inner filter may remain when non-pushdownable; importantly, there is no + // outer filter — only outer projects wrapping an inner filter directly on scanBuilder. + var scanBuilder = newScanBuilder(); + LogicalPlan root = project(project(filter(scanBuilder))); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanRejectsFilterProjectFilterShape() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v WHERE v.gender='M') t + // WHERE t.price < 150 + // Shape: Filter(outer) → Project(subquery) → Filter(inner) → scanBuilder + // The outer filter is still separated from the scan by the subquery Project; the inner + // filter sitting between the Project and the scan does not erase that boundary. Without + // preserving the project marker across the inner filter, the walker would miss this shape. + var scanBuilder = newScanBuilder(); + LogicalPlan root = filter(project(filter(scanBuilder))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer WHERE on a vectorSearch() subquery"), + "Error should mention outer WHERE on subquery; actual: " + ex.getMessage()); + } + + @Test + void validatePlanAllowsNoFilterAtAll() { + // Baseline: no WHERE anywhere. SELECT * FROM (SELECT v.id FROM vs(...) AS v) t + var scanBuilder = newScanBuilder(); + LogicalPlan root = project(project(scanBuilder)); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanAllowsBareScanBuilder() { + // Defensive: a plan that is just the scan builder itself. + var scanBuilder = newScanBuilder(); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(scanBuilder)); + } + + @Test + void validatePlanRejectsOuterSortOverSubqueryProject() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v) t ORDER BY t.price + // Shape: Sort(outer) → Project(subquery) → scanBuilder + // Outer ORDER BY would be applied only after top-k ANN results, producing an order the user + // did not ask for (vector distance ordering leaks through when rows are fewer than expected). + var scanBuilder = newScanBuilder(); + LogicalPlan root = sort(project(scanBuilder)); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer ORDER BY on a vectorSearch() subquery"), + "Error should mention outer ORDER BY on subquery; actual: " + ex.getMessage()); + } + + @Test + void validatePlanRejectsOuterOffsetOverSubqueryProject() { + // Models: SELECT * FROM (SELECT v.id FROM vs(...) AS v) t LIMIT 10 OFFSET 5 + // Outer OFFSET silently skips the top-N nearest rows chosen by ANN, so the remaining rows + // would be a truncated tail of the k-NN result set rather than the user's intended window. + var scanBuilder = newScanBuilder(); + LogicalPlan root = limit(project(scanBuilder), 5); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer OFFSET on a vectorSearch() subquery"), + "Error should mention outer OFFSET on subquery; actual: " + ex.getMessage()); + } + + @Test + void validatePlanAllowsOuterLimitWithoutOffsetOverSubquery() { + // Outer LIMIT with offset=0 just caps row count and is safe over a subquery — reject only + // non-zero OFFSET. Locks in the offset==0 boundary of the guard. + var scanBuilder = newScanBuilder(); + LogicalPlan root = limit(project(scanBuilder), 0); + + assertDoesNotThrow(() -> scanBuilder.validatePlan(root)); + } + + @Test + void validatePlanRejectsOuterAggregationOverSubqueryProject() { + // Models: SELECT COUNT(*) FROM (SELECT v.id FROM vs(...) AS v) t + // (Or outer GROUP BY / DISTINCT, both of which rewrite to LogicalAggregation.) The outer + // aggregation would run on a truncated top-k slice rather than a meaningful population, + // masking the fact that aggregations are not supported on vectorSearch() in this preview. + var scanBuilder = newScanBuilder(); + LogicalPlan root = aggregation(project(scanBuilder)); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> scanBuilder.validatePlan(root)); + assertTrue( + ex.getMessage().contains("Outer GROUP BY / aggregation / DISTINCT on a vectorSearch()"), + "Error should mention outer aggregation on subquery; actual: " + ex.getMessage()); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanTest.java new file mode 100644 index 00000000000..3fa2adec88a --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchIndexScanTest.java @@ -0,0 +1,39 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; + +import org.junit.jupiter.api.Test; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.sql.opensearch.storage.capability.KnnPluginCapability; + +class VectorSearchIndexScanTest { + + @Test + void openProbesKnnPluginBeforeFetch() { + OpenSearchClient client = mock(OpenSearchClient.class); + OpenSearchRequest request = mock(OpenSearchRequest.class); + KnnPluginCapability capability = mock(KnnPluginCapability.class); + doThrow(new ExpressionEvaluationException("k-NN plugin missing")) + .when(capability) + .requireInstalled(); + + VectorSearchIndexScan scan = new VectorSearchIndexScan(client, 10, request, capability); + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, scan::open); + assertTrue(ex.getMessage().contains("k-NN plugin")); + // Capability threw, so the underlying client must not have been touched for this scan. + verify(client, never()).search(request); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java new file mode 100644 index 00000000000..b02d680af15 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java @@ -0,0 +1,857 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import org.apache.lucene.search.join.ScoreMode; +import org.junit.jupiter.api.Test; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.WrapperQueryBuilder; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.storage.FilterType; +import org.opensearch.sql.planner.logical.LogicalFilter; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalValues; + +class VectorSearchQueryBuilderTest { + + @Test + void knnQuerySetAsScoringQuery() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + QueryBuilder query = requestBuilder.getSourceBuilder().query(); + assertTrue( + query instanceof WrapperQueryBuilder, + "knn query should be set directly as top-level query (scoring context)"); + } + + @Test + void pushDownFilterKeepsKnnInScoringContext() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // Simulate WHERE name = 'John' + var condition = DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John")); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + + assertTrue(pushed, "pushDownFilter should succeed"); + QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query(); + assertTrue(resultQuery instanceof BoolQueryBuilder, "Result should be a BoolQuery"); + BoolQueryBuilder boolQuery = (BoolQueryBuilder) resultQuery; + assertEquals(1, boolQuery.must().size(), "knn query should be in must (scoring context)"); + assertEquals(1, boolQuery.filter().size(), "WHERE predicate should be in filter (non-scoring)"); + assertTrue( + boolQuery.must().get(0) instanceof WrapperQueryBuilder, + "must clause should contain the original knn WrapperQueryBuilder"); + } + + @Test + void pushDownLimitWithinKSucceeds() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 3, 0); + + boolean pushed = builder.pushDownLimit(limit); + assertTrue(pushed, "LIMIT within k should succeed"); + } + + @Test + void pushDownLimitExceedingKThrows() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 10, 0); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownLimit(limit)); + assertTrue(ex.getMessage().contains("LIMIT 10 exceeds k=5")); + } + + @Test + void pushDownLimitEqualToKSucceeds() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 5, 0); + + boolean pushed = builder.pushDownLimit(limit); + assertTrue(pushed, "LIMIT equal to k should succeed"); + } + + @Test + void pushDownLimitRadialModeNoRestriction() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("max_distance", "10.0")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 100, 0); + + boolean pushed = builder.pushDownLimit(limit); + assertTrue(pushed, "Radial mode should not restrict LIMIT"); + } + + @Test + void pushDownLimitMinScoreModeNoRestriction() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("min_score", "0.5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 100, 0); + + boolean pushed = builder.pushDownLimit(limit); + assertTrue(pushed, "min_score mode should not restrict LIMIT"); + } + + @Test + void pushDownSortScoreDescAccepted() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + boolean pushed = builder.pushDownSort(sort); + assertTrue(pushed, "ORDER BY _score DESC should be accepted"); + } + + @Test + void pushDownSortPreservesSortCountAsLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "10")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + // LogicalSort with count=7 simulates a sort+limit combined node (PPL path) + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + 7, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + boolean pushed = builder.pushDownSort(sort); + assertTrue(pushed, "ORDER BY _score DESC with count should be accepted"); + assertEquals( + 7, + requestBuilder.getMaxResponseSize(), + "sort.getCount() should be pushed down as request size"); + } + + @Test + void pushDownSortCountExceedingKRejects() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + // LogicalSort with count=10 exceeds k=5 — should be rejected + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + 10, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("LIMIT 10 exceeds k=5")); + } + + @Test + void pushDownSortNonScoreFieldRejected() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC, + new ReferenceExpression("name", STRING)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("unsupported sort expression")); + } + + @Test + void pushDownSortMultipleExpressionsRejectsNonScore() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)), + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC, + new ReferenceExpression("name", STRING)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("unsupported sort expression")); + } + + @Test + void pushDownSortScoreAscRejected() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("_score ASC is not supported")); + } + + @Test + void pushDownFilterCompoundPredicateSurvives() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // Simulate WHERE name = 'John' AND age > 30 + var condition = + DSL.and( + DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John")), + DSL.greater(new ReferenceExpression("age", ExprCoreType.INTEGER), DSL.literal(30))); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + + assertTrue(pushed, "pushDownFilter with compound predicate should succeed"); + QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query(); + assertTrue(resultQuery instanceof BoolQueryBuilder, "Result should be a BoolQuery"); + BoolQueryBuilder boolQuery = (BoolQueryBuilder) resultQuery; + assertEquals(1, boolQuery.must().size(), "knn query should be in must (scoring context)"); + assertEquals(1, boolQuery.filter().size(), "compound WHERE should be in filter (non-scoring)"); + } + + @Test + void pushDownFilterEfficientPlacesInsideKnn() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + // Callback simulates VectorSearchIndex rebuilding knn with filter + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{\"filter\":\"embedded\"}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + var condition = DSL.equal(new ReferenceExpression("city", STRING), DSL.literal("Miami")); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + + assertTrue(pushed, "pushDownFilter should succeed"); + QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query(); + assertTrue( + resultQuery instanceof WrapperQueryBuilder, + "Efficient filter should produce a WrapperQueryBuilder (rebuilt knn), not BoolQuery"); + } + + @Test + void pushDownFilterExplicitPostProducesBool() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.POST, true, null); + + var condition = DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John")); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + + assertTrue(pushed); + QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query(); + assertTrue(resultQuery instanceof BoolQueryBuilder); + BoolQueryBuilder boolQuery = (BoolQueryBuilder) resultQuery; + assertEquals(1, boolQuery.must().size()); + assertEquals(1, boolQuery.filter().size()); + } + + // ── Constructor validation ────────────────────────────────────────── + + @Test + void constructorRejectsEfficientModeWithNullCallback() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + + assertThrows( + IllegalArgumentException.class, + () -> + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.EFFICIENT, true, null)); + } + + // ── Build-time validation ──────────────────────────────────────────── + + @Test + void buildRejectsExplicitFilterTypePostWithoutWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.POST, true, null); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, builder::build); + assertTrue(ex.getMessage().contains("filter_type requires a pushdownable WHERE clause")); + } + + @Test + void buildRejectsExplicitFilterTypeEfficientWithoutWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{\"filter\":\"embedded\"}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, builder::build); + assertTrue(ex.getMessage().contains("filter_type requires a pushdownable WHERE clause")); + } + + @Test + void buildSucceedsWithNoFilterTypeAndNoWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + @Test + void buildSucceedsWithFilterTypeAndPushedWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.POST, true, null); + + var condition = DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John")); + var dummyChild = new LogicalValues(Collections.emptyList()); + builder.pushDownFilter(new LogicalFilter(dummyChild, condition)); + + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + // ── Radial without LIMIT rejection ───────────────────────────────── + + @Test + void buildRejectsRadialMaxDistanceWithoutLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("max_distance", "10.0")); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, builder::build); + assertTrue(ex.getMessage().contains("LIMIT is required for radial vector search")); + } + + @Test + void buildRejectsRadialMinScoreWithoutLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("min_score", "0.5")); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, builder::build); + assertTrue(ex.getMessage().contains("LIMIT is required for radial vector search")); + } + + @Test + void buildSucceedsRadialWithLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("max_distance", "10.0")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + builder.pushDownLimit(new LogicalLimit(dummyChild, 50, 0)); + + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + @Test + void buildSucceedsTopKWithoutLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + // ── Regression: LIMIT and sort invariants under efficient mode ────── + + @Test + void pushDownLimitExceedingKThrowsUnderEfficientMode() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 10, 0); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownLimit(limit)); + assertTrue(ex.getMessage().contains("LIMIT 10 exceeds k=5")); + } + + @Test + void pushDownSortScoreDescAcceptedUnderEfficientMode() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + boolean pushed = builder.pushDownSort(sort); + assertTrue(pushed, "ORDER BY _score DESC should be accepted under efficient mode"); + } + + @Test + void pushDownSortNonScoreRejectedUnderEfficientMode() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC, + new ReferenceExpression("name", STRING)))); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort)); + assertTrue(ex.getMessage().contains("unsupported sort expression")); + } + + // ── Non-pushdownable filter handling ────────────────────────────────── + + @Test + void pushDownFilterNonPushdownableWithExplicitFilterTypeThrows() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, knnQuery, Map.of("k", "5"), FilterType.POST, true, null); + + // STRUCT = STRUCT triggers ScriptQueryUnSupportedException in FilterQueryBuilder + var condition = + DSL.equal( + new ReferenceExpression("nested_field", ExprCoreType.STRUCT), + new ReferenceExpression("other_field", ExprCoreType.STRUCT)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("filter_type only works when the WHERE clause can be translated")); + assertTrue(ex.getMessage().contains("Rewrite the WHERE clause or omit filter_type")); + } + + @Test + void pushDownFilterNonPushdownableWithoutExplicitFilterTypeFallsBack() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // STRUCT = STRUCT triggers ScriptQueryUnSupportedException in FilterQueryBuilder + var condition = + DSL.equal( + new ReferenceExpression("nested_field", ExprCoreType.STRUCT), + new ReferenceExpression("other_field", ExprCoreType.STRUCT)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + boolean pushed = builder.pushDownFilter(filter); + assertFalse(pushed, "Non-pushdownable filter should return false for in-memory fallback"); + } + + // ── OFFSET rejection ──────────────────────────────────────────────── + + @Test + void pushDownLimit_rejectsNonZeroOffset() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + // LIMIT 3 OFFSET 2: the planner passes both through LogicalLimit + var limit = new LogicalLimit(dummyChild, 3, 2); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownLimit(limit)); + assertTrue( + ex.getMessage().contains("OFFSET is not supported on vectorSearch()"), + "Expected OFFSET rejection message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("LIMIT only"), + "Expected remediation guidance in message, got: " + ex.getMessage()); + } + + @Test + void pushDownLimit_acceptsZeroOffset() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + var limit = new LogicalLimit(dummyChild, 3, 0); + + // Zero offset is the normal case; must continue to succeed. + assertTrue(builder.pushDownLimit(limit)); + } + + // ── WHERE on _score rejection ──────────────────────────────────────── + + @Test + void pushDownFilter_rejectsScoreReferenceInWhere() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // WHERE _score > 0.5 (note: _score is a synthetic column, not a stored field) + var condition = + DSL.greater(new ReferenceExpression("_score", ExprCoreType.FLOAT), DSL.literal(0.5)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("WHERE on _score is not supported"), + "Expected _score rejection message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("min_score"), + "Expected remediation guidance pointing at option='min_score=...', got: " + + ex.getMessage()); + } + + @Test + void pushDownFilter_rejectsScoreReferenceInsideCompound() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // WHERE state = 'TX' AND _score > 0.5: rejection must walk compound predicates + var condition = + DSL.and( + DSL.equal(new ReferenceExpression("state", STRING), DSL.literal("TX")), + DSL.greater(new ReferenceExpression("_score", ExprCoreType.FLOAT), DSL.literal(0.5))); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("WHERE on _score is not supported"), + "Expected _score rejection message, got: " + ex.getMessage()); + } + + @Test + void pushDownFilter_rejectsUppercaseScoreReference() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + // WHERE _SCORE > 0.5 must be rejected the same way as _score; the check is case-insensitive + // so variants that preserve original casing cannot bypass the guard. + var condition = + DSL.greater(new ReferenceExpression("_SCORE", ExprCoreType.FLOAT), DSL.literal(0.5)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("WHERE on _score is not supported"), + "Expected _score rejection message, got: " + ex.getMessage()); + } + + // ── filter_type=efficient rejects script subtrees ─────────────────── + + @Test + void pushDownFilter_efficient_rejectsScriptSubtree() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + Function rebuildWithFilter = + whereQuery -> new WrapperQueryBuilder("{\"knn\":{\"filter\":\"embedded\"}}"); + var builder = + new VectorSearchQueryBuilder( + requestBuilder, + knnQuery, + Map.of("k", "5"), + FilterType.EFFICIENT, + true, + rebuildWithFilter); + + // price + 1 > 100 lowers to a ScriptQueryBuilder; embedding it under knn.filter would + // trigger the AOSS rejection this PR guards against. + var condition = + DSL.greater( + DSL.add(new ReferenceExpression("price", ExprCoreType.INTEGER), DSL.literal(1)), + DSL.literal(100)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + ExpressionEvaluationException ex = + assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter)); + assertTrue( + ex.getMessage().contains("vectorSearch WHERE pre-filtering does not support"), + "Expected script rejection message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("script queries"), + "Expected script queries guidance in message, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("filter_type=post"), + "Expected filter_type=post fallback guidance, got: " + ex.getMessage()); + } + + @Test + void pushDownFilter_post_allowsScriptSubtree() { + // POST puts WHERE in an outer bool.filter, not under knn.filter, so scripts are fine. + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5")); + + var condition = + DSL.greater( + DSL.add(new ReferenceExpression("price", ExprCoreType.INTEGER), DSL.literal(1)), + DSL.literal(100)); + var dummyChild = new LogicalValues(Collections.emptyList()); + var filter = new LogicalFilter(dummyChild, condition); + + assertTrue(builder.pushDownFilter(filter), "POST mode must still accept script predicates"); + } + + @Test + void buildSucceedsRadialWithSortEmbeddedLimit() { + var requestBuilder = createRequestBuilder(); + var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}"); + var builder = + new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("max_distance", "10.0")); + + var dummyChild = new LogicalValues(Collections.emptyList()); + // LogicalSort with count=50 simulates PPL sort-with-limit path + var sort = + new org.opensearch.sql.planner.logical.LogicalSort( + dummyChild, + 50, + List.of( + org.apache.commons.lang3.tuple.ImmutablePair.of( + org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC, + new ReferenceExpression("_score", ExprCoreType.FLOAT)))); + + builder.pushDownSort(sort); + + // build() should not reject — limitPushed must be true via pushDownSort's count path + OpenSearchRequestBuilder result = builder.build(); + assertNotNull(result); + } + + // ── filter_type=efficient allow-list validator ────────────────────── + + @Test + void validateEfficientFilterSafe_rejectsNestedQuery() { + // FilterQueryBuilder emits NestedQueryBuilder for SQL nested(field, pred); nested vector + // semantics are outside the P0 preview so rejection must be targeted, not generic. + QueryBuilder nested = + QueryBuilders.nestedQuery( + "parent", QueryBuilders.termQuery("parent.f", "v"), ScoreMode.None); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchQueryBuilder.validateEfficientFilterSafe(nested)); + assertTrue( + ex.getMessage().contains("vectorSearch WHERE pre-filtering does not support nested"), + "Expected targeted nested rejection, got: " + ex.getMessage()); + } + + @Test + void validateEfficientFilterSafe_rejectsNestedBuriedInBool() { + // AND-ing nested() with a term must still be caught; otherwise the guard is trivially bypassed. + QueryBuilder tree = + QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery("state", "CA")) + .filter( + QueryBuilders.nestedQuery( + "parent", QueryBuilders.termQuery("parent.f", "v"), ScoreMode.None)); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchQueryBuilder.validateEfficientFilterSafe(tree)); + assertTrue(ex.getMessage().contains("nested predicates")); + } + + @Test + void validateEfficientFilterSafe_acceptsBoolOfSafeLeaves() { + QueryBuilder tree = + QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery("category", "shoes")) + .filter(QueryBuilders.rangeQuery("price").gte(80).lte(150)); + + VectorSearchQueryBuilder.validateEfficientFilterSafe(tree); + } + + @Test + void validateEfficientFilterSafe_acceptsExistsLeaf() { + // IS NOT NULL lowers to ExistsQueryBuilder; locks in allow-list coverage for that path. + QueryBuilder exists = QueryBuilders.existsQuery("brand"); + + VectorSearchQueryBuilder.validateEfficientFilterSafe(exists); + } + + @Test + void validateEfficientFilterSafe_rejectsUnknownWrapper() { + // Unknown shapes must fail closed so future FilterQueryBuilder additions cannot silently + // re-introduce the AOSS-rejection bug class this PR is guarding against. + QueryBuilder unknown = new WrapperQueryBuilder("{\"term\":{\"f\":\"v\"}}"); + + ExpressionEvaluationException ex = + assertThrows( + ExpressionEvaluationException.class, + () -> VectorSearchQueryBuilder.validateEfficientFilterSafe(unknown)); + assertTrue( + ex.getMessage().contains("unsupported filter query shape"), + "Expected unknown-shape rejection, got: " + ex.getMessage()); + assertTrue( + ex.getMessage().contains("WrapperQueryBuilder"), + "Expected class name in message, got: " + ex.getMessage()); + } + + private OpenSearchRequestBuilder createRequestBuilder() { + return new OpenSearchRequestBuilder( + mock(OpenSearchExprValueFactory.class), 10000, mock(Settings.class)); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilderTest.java index 64ae7b187c2..88b837d562d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilderTest.java @@ -410,8 +410,7 @@ void should_build_top_hits_aggregation() { + " \"seq_no_primary_term\" : false,%n" + " \"explain\" : false,%n" + " \"_source\" : {%n" - + " \"includes\" : [ \"name\" ],%n" - + " \"excludes\" : [ ]%n" + + " \"includes\" : [ \"name\" ]%n" + " }%n" + " }%n" + " }%n" @@ -450,8 +449,7 @@ void should_build_filtered_top_hits_aggregation() { + " \"seq_no_primary_term\" : false,%n" + " \"explain\" : false,%n" + " \"_source\" : {%n" - + " \"includes\" : [ \"name\" ],%n" - + " \"excludes\" : [ ]%n" + + " \"includes\" : [ \"name\" ]%n" + " }%n" + " }%n" + " }%n" diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 310bb5e73c5..e930056474a 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -174,20 +174,80 @@ void should_build_wildcard_query_for_like_expression() { } @Test - void should_build_script_query_for_unsupported_lucene_query() { + void should_build_exists_query_for_is_not_null() { + assertJsonEquals( + "{\n" + + " \"exists\" : {\n" + + " \"field\" : \"age\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + "}", + buildQuery(DSL.isnotnull(ref("age", INTEGER)))); + } + + @Test + void should_build_must_not_exists_query_for_is_null() { + assertJsonEquals( + "{\n" + + " \"bool\" : {\n" + + " \"must_not\" : [\n" + + " {\n" + + " \"exists\" : {\n" + + " \"field\" : \"age\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + "}", + buildQuery(DSL.is_null(ref("age", INTEGER)))); + } + + @Test + void should_fallback_to_script_for_nested_is_not_null() { + // Nested IS_NOT_NULL must NOT route through NestedQuery.buildNested(): that path reads + // arg[1] and unary IS_NOT_NULL only has arg[0]. ExistsQuery.isNestedPredicate() returns + // false precisely to force the script fallback here. mockToStringSerializer(); assertJsonEquals( "{\n" + " \"script\" : {\n" + " \"script\" : {\n" - + " \"source\" : \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"is not" - + " null(age)\\\"}\",\n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"is" + + " not null(FunctionExpression(functionName=nested, arguments=[message.info," + + " message]))\\\"}\",\n" + " \"lang\" : \"opensearch_compounded_script\"\n" + " },\n" + " \"boost\" : 1.0\n" + " }\n" + "}", - buildQuery(DSL.isnotnull(ref("age", INTEGER)))); + buildQuery( + DSL.isnotnull( + DSL.nested(DSL.ref("message.info", STRING), DSL.ref("message", STRING))))); + } + + @Test + void should_fallback_to_script_for_nested_is_null() { + // Symmetric to the IS_NOT_NULL case: must not crash with an arg[1] lookup via NestedQuery. + mockToStringSerializer(); + assertJsonEquals( + "{\n" + + " \"script\" : {\n" + + " \"script\" : {\n" + + " \"source\" :" + + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"is" + + " null(FunctionExpression(functionName=nested, arguments=[message.info," + + " message]))\\\"}\",\n" + + " \"lang\" : \"opensearch_compounded_script\"\n" + + " },\n" + + " \"boost\" : 1.0\n" + + " }\n" + + "}", + buildQuery( + DSL.is_null(DSL.nested(DSL.ref("message.info", STRING), DSL.ref("message", STRING))))); } @Test diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRuleTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRuleTest.java new file mode 100644 index 00000000000..22b1b36ca91 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/MergeRules/TextKeywordConflictRuleTest.java @@ -0,0 +1,146 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.util.MergeRules; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; +import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; + +class TextKeywordConflictRuleTest { + + private final TextKeywordConflictRule rule = new TextKeywordConflictRule(); + + @Test + void testMatchTextAndKeyword() { + OpenSearchDataType text = OpenSearchDataType.of(MappingType.Text); + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + assertTrue(rule.isMatch(text, keyword)); + assertTrue(rule.isMatch(keyword, text)); + } + + @Test + void testMatchMatchOnlyTextAndKeyword() { + OpenSearchDataType matchOnlyText = OpenSearchDataType.of(MappingType.MatchOnlyText); + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + assertTrue(rule.isMatch(matchOnlyText, keyword)); + assertTrue(rule.isMatch(keyword, matchOnlyText)); + } + + @Test + void testMatchTextWithKeywordSubfieldAndTextWithout() { + OpenSearchTextType textWithKeyword = + OpenSearchTextType.of(Map.of("keyword", OpenSearchDataType.of(MappingType.Keyword))); + OpenSearchTextType textWithout = OpenSearchTextType.of(); + assertTrue(rule.isMatch(textWithKeyword, textWithout)); + assertTrue(rule.isMatch(textWithout, textWithKeyword)); + } + + @Test + void testNoMatchSameTextWithoutSubfields() { + OpenSearchTextType text1 = OpenSearchTextType.of(); + OpenSearchTextType text2 = OpenSearchTextType.of(); + assertFalse(rule.isMatch(text1, text2)); + } + + @Test + void testNoMatchBothTextWithKeywordSubfields() { + OpenSearchTextType textWithKeyword1 = + OpenSearchTextType.of(Map.of("keyword", OpenSearchDataType.of(MappingType.Keyword))); + OpenSearchTextType textWithKeyword2 = + OpenSearchTextType.of(Map.of("keyword", OpenSearchDataType.of(MappingType.Keyword))); + assertFalse(rule.isMatch(textWithKeyword1, textWithKeyword2)); + } + + @Test + void testNoMatchKeywordAndKeyword() { + OpenSearchDataType keyword1 = OpenSearchDataType.of(MappingType.Keyword); + OpenSearchDataType keyword2 = OpenSearchDataType.of(MappingType.Keyword); + assertFalse(rule.isMatch(keyword1, keyword2)); + } + + @Test + void testNoMatchIntegerAndKeyword() { + OpenSearchDataType integer = OpenSearchDataType.of(MappingType.Integer); + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + assertFalse(rule.isMatch(integer, keyword)); + } + + @Test + void testNoMatchNullSource() { + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + assertFalse(rule.isMatch(null, keyword)); + } + + @Test + void testNoMatchNullTarget() { + OpenSearchDataType text = OpenSearchDataType.of(MappingType.Text); + assertFalse(rule.isMatch(text, null)); + } + + @Test + void testMergeProducesTextWithoutKeywordSubfields() { + OpenSearchDataType keyword = OpenSearchDataType.of(MappingType.Keyword); + Map target = new HashMap<>(); + target.put("msg", keyword); + + OpenSearchDataType text = OpenSearchDataType.of(MappingType.Text); + rule.mergeInto("msg", text, target); + + OpenSearchDataType merged = target.get("msg"); + assertInstanceOf(OpenSearchTextType.class, merged); + OpenSearchTextType mergedText = (OpenSearchTextType) merged; + assertTrue(mergedText.getFields().isEmpty(), "Merged type should have no keyword subfields"); + } + + @Test + void testMergeHelperIntegration() { + // Simulate merging two index mappings with conflicting text/keyword types + Map target = new HashMap<>(); + target.put("msg", OpenSearchDataType.of(MappingType.Keyword)); + target.put("idx", OpenSearchDataType.of(MappingType.Integer)); + + Map source = new HashMap<>(); + source.put("msg", OpenSearchDataType.of(MappingType.Text)); + source.put("idx", OpenSearchDataType.of(MappingType.Integer)); + + MergeRuleHelper.merge(target, source); + + // msg should be merged to text without keyword subfields + assertInstanceOf(OpenSearchTextType.class, target.get("msg")); + OpenSearchTextType mergedText = (OpenSearchTextType) target.get("msg"); + assertTrue(mergedText.getFields().isEmpty()); + + // idx should remain integer (same type in both, LatestRule applies) + assertEquals(MappingType.Integer, target.get("idx").getMappingType()); + } + + @Test + void testToKeywordSubFieldReturnsNullForMergedType() { + // After merging text and keyword, toKeywordSubField should return null, + // forcing SOURCE retrieval instead of DOC_VALUE + Map target = new HashMap<>(); + target.put("msg", OpenSearchDataType.of(MappingType.Keyword)); + + Map source = new HashMap<>(); + source.put("msg", OpenSearchDataType.of(MappingType.Text)); + + MergeRuleHelper.merge(target, source); + + OpenSearchDataType mergedType = target.get("msg"); + String result = OpenSearchTextType.toKeywordSubField("msg", mergedType.getExprType()); + // Should return null because the merged text type has no keyword subfield + assertNull(result); + } +} diff --git a/plugin/build.gradle b/plugin/build.gradle index 340787fa01f..708c4b18b35 100644 --- a/plugin/build.gradle +++ b/plugin/build.gradle @@ -1,5 +1,4 @@ import java.util.concurrent.Callable -import org.opensearch.gradle.dependencies.CompileOnlyResolvePlugin /* * Copyright OpenSearch Contributors @@ -55,7 +54,7 @@ opensearchplugin { name 'opensearch-sql' description 'OpenSearch SQL' classname 'org.opensearch.sql.plugin.SQLPlugin' - extendedPlugins = ['opensearch-job-scheduler'] + extendedPlugins = ['opensearch-job-scheduler', 'analytics-engine;optional=true'] licenseFile rootProject.file("LICENSE.txt") noticeFile rootProject.file("NOTICE") } @@ -160,6 +159,8 @@ dependencies { api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson_annotations}" api project(":ppl") + api project(':api') + implementation("org.opensearch.sandbox:analytics-api:${opensearch_version}") api project(':legacy') api project(':opensearch') api project(':prometheus') @@ -320,4 +321,3 @@ testClusters.integTest { run { useCluster testClusters.integTest } - diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java index edffd65f6bf..e7dd3dbc776 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java @@ -18,6 +18,7 @@ import java.util.Collection; import java.util.List; import java.util.Objects; +import java.util.function.BiFunction; import java.util.function.Supplier; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; @@ -36,8 +37,10 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsFilter; import org.opensearch.common.util.concurrent.OpenSearchExecutors; +import org.opensearch.core.action.ActionListener; import org.opensearch.core.action.ActionResponse; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.env.NodeEnvironment; @@ -50,11 +53,15 @@ import org.opensearch.plugins.ScriptPlugin; import org.opensearch.plugins.SystemIndexPlugin; import org.opensearch.repositories.RepositoriesService; +import org.opensearch.rest.BytesRestResponse; +import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestController; import org.opensearch.rest.RestHandler; import org.opensearch.script.ScriptContext; import org.opensearch.script.ScriptEngine; import org.opensearch.script.ScriptService; +import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.datasource.DataSourceService; import org.opensearch.sql.datasources.auth.DataSourceUserAuthorizationHelper; import org.opensearch.sql.datasources.auth.DataSourceUserAuthorizationHelperImpl; @@ -85,6 +92,8 @@ import org.opensearch.sql.directquery.transport.model.ExecuteDirectQueryActionResponse; import org.opensearch.sql.directquery.transport.model.ReadDirectQueryResourcesActionResponse; import org.opensearch.sql.directquery.transport.model.WriteDirectQueryResourcesActionResponse; +import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; +import org.opensearch.sql.executor.QueryType; import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.metrics.Metrics; import org.opensearch.sql.legacy.plugin.RestSqlAction; @@ -94,14 +103,18 @@ import org.opensearch.sql.opensearch.storage.OpenSearchDataSourceFactory; import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine; import org.opensearch.sql.plugin.config.OpenSearchPluginModule; +import org.opensearch.sql.plugin.rest.AnalyticsExecutorHolder; import org.opensearch.sql.plugin.rest.RestPPLGrammarAction; import org.opensearch.sql.plugin.rest.RestPPLQueryAction; import org.opensearch.sql.plugin.rest.RestPPLStatsAction; import org.opensearch.sql.plugin.rest.RestQuerySettingsAction; +import org.opensearch.sql.plugin.rest.RestUnifiedQueryAction; import org.opensearch.sql.plugin.transport.PPLQueryAction; import org.opensearch.sql.plugin.transport.TransportPPLQueryAction; import org.opensearch.sql.plugin.transport.TransportPPLQueryResponse; import org.opensearch.sql.prometheus.storage.PrometheusStorageFactory; +import org.opensearch.sql.protocol.response.format.JsonResponseFormatter; +import org.opensearch.sql.protocol.response.format.JsonResponseFormatter.Style; import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorService; import org.opensearch.sql.spark.cluster.ClusterManagerEventListener; import org.opensearch.sql.spark.flint.FlintIndexMetadataServiceImpl; @@ -117,6 +130,7 @@ import org.opensearch.sql.spark.transport.model.CancelAsyncQueryActionResponse; import org.opensearch.sql.spark.transport.model.CreateAsyncQueryActionResponse; import org.opensearch.sql.spark.transport.model.GetAsyncQueryResultActionResponse; +import org.opensearch.sql.sql.domain.SQLQueryRequest; import org.opensearch.sql.storage.DataSourceFactory; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; @@ -165,7 +179,7 @@ public List getRestHandlers( return Arrays.asList( new RestPPLQueryAction(), new RestPPLGrammarAction(), - new RestSqlAction(settings, injector), + new RestSqlAction(settings, injector, createSqlAnalyticsRouter()), new RestSqlStatsAction(settings, restController), new RestPPLStatsAction(settings, restController), new RestQuerySettingsAction(settings, restController), @@ -175,6 +189,88 @@ public List getRestHandlers( new RestDirectQueryResourcesManagementAction((OpenSearchSettings) pluginSettings)); } + /** + * Creates a routing function for SQL queries targeting analytics engine indices. Returns {@code + * true} if the query was handled (analytics index), {@code false} to fall through to normal SQL. + * + *

    The {@link RestUnifiedQueryAction} is built lazily on the first request because the + * analytics-engine {@code QueryPlanExecutor} is published into {@link AnalyticsExecutorHolder} by + * {@code TransportPPLQueryAction}'s {@code @Inject} constructor — which fires after the Node + * Guice injector is built, i.e. after {@code getRestHandlers}. If the executor is still + * unavailable when a SQL request arrives, the router falls through to the legacy SQL path. + */ + private BiFunction createSqlAnalyticsRouter() { + final RestUnifiedQueryAction[] cached = new RestUnifiedQueryAction[1]; + java.util.function.Supplier handlerSupplier = + () -> { + if (cached[0] == null) { + var executor = AnalyticsExecutorHolder.get(); + if (executor == null) { + return null; + } + cached[0] = + new RestUnifiedQueryAction(client, clusterService, executor, pluginSettings); + } + return cached[0]; + }; + return (sqlRequest, channel) -> { + RestUnifiedQueryAction unifiedQueryHandler = handlerSupplier.get(); + if (unifiedQueryHandler == null + || !unifiedQueryHandler.isAnalyticsIndex(sqlRequest.getQuery(), QueryType.SQL)) { + return false; + } + if (sqlRequest.isExplainRequest()) { + unifiedQueryHandler.explain( + sqlRequest.getQuery(), + QueryType.SQL, + ExplainMode.STANDARD, + new ResponseListener<>() { + @Override + public void onResponse(ExplainResponse response) { + JsonResponseFormatter formatter = + new JsonResponseFormatter<>(Style.PRETTY) { + @Override + protected Object buildJsonObject(ExplainResponse resp) { + return resp; + } + }; + channel.sendResponse( + new BytesRestResponse( + RestStatus.OK, + "application/json; charset=UTF-8", + formatter.format(response))); + } + + @Override + public void onFailure(Exception e) { + channel.sendResponse( + new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, e.getMessage())); + } + }); + } else { + unifiedQueryHandler.execute( + sqlRequest.getQuery(), + QueryType.SQL, + false, + new ActionListener<>() { + @Override + public void onResponse(TransportPPLQueryResponse response) { + channel.sendResponse( + new BytesRestResponse( + RestStatus.OK, "application/json; charset=UTF-8", response.getResult())); + } + + @Override + public void onFailure(Exception e) { + channel.sendResponse( + new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, e.getMessage())); + } + }); + } + return true; + }; + } + /** Register action and handler so that transportClient can find proxy for action. */ @Override public List> getActions() { diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java index 35504dd83c2..d9406935ee5 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java @@ -5,6 +5,7 @@ package org.opensearch.sql.plugin.config; +import java.util.List; import lombok.RequiredArgsConstructor; import org.opensearch.common.inject.AbstractModule; import org.opensearch.common.inject.Provides; @@ -13,6 +14,7 @@ import org.opensearch.sql.analysis.ExpressionAnalyzer; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.executor.DelegatingExecutionEngine; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryManager; import org.opensearch.sql.executor.QueryService; @@ -41,6 +43,13 @@ @RequiredArgsConstructor public class OpenSearchPluginModule extends AbstractModule { + private final List executionEngineExtensions; + + /** Default constructor for when no engines are available. */ + public OpenSearchPluginModule() { + this(List.of()); + } + private final BuiltinFunctionRepository functionRepository = BuiltinFunctionRepository.getInstance(); @@ -61,7 +70,12 @@ public StorageEngine storageEngine(OpenSearchClient client, Settings settings) { @Singleton public ExecutionEngine executionEngine( OpenSearchClient client, ExecutionProtector protector, PlanSerializer planSerializer) { - return new OpenSearchExecutionEngine(client, protector, planSerializer); + ExecutionEngine defaultEngine = + new OpenSearchExecutionEngine(client, protector, planSerializer); + if (executionEngineExtensions.isEmpty()) { + return defaultEngine; + } + return new DelegatingExecutionEngine(defaultEngine, executionEngineExtensions); } @Provides diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/request/PPLQueryRequestFactory.java b/plugin/src/main/java/org/opensearch/sql/plugin/request/PPLQueryRequestFactory.java index 0d07dab966a..bb87bf7fa91 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/request/PPLQueryRequestFactory.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/request/PPLQueryRequestFactory.java @@ -113,6 +113,11 @@ private static PPLQueryRequest parsePPLRequestFromPayload(RestRequest restReques if (pretty) { pplRequest.style(JsonResponseFormatter.Style.PRETTY); } + // set queryId + String queryId = jsonContent.optString("queryId", null); + if (queryId != null) { + pplRequest.queryId(queryId); + } return pplRequest; } catch (JSONException e) { throw new IllegalArgumentException("Failed to parse request payload", e); diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/AnalyticsExecutorHolder.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/AnalyticsExecutorHolder.java new file mode 100644 index 00000000000..fa3e7d1d1fa --- /dev/null +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/AnalyticsExecutorHolder.java @@ -0,0 +1,36 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin.rest; + +import org.apache.calcite.rel.RelNode; +import org.opensearch.analytics.exec.QueryPlanExecutor; + +/** + * Bridge for sharing the analytics-engine {@link QueryPlanExecutor} between the PPL transport + * action (where Guice resolves the binding via {@code @Inject}) and the REST-only SQL router (where + * Guice cannot, because {@code SQLPlugin#getRestHandlers} runs before the Node-level injector + * satisfies {@code @Inject} parameters). + * + *

    Why a static holder: cross-plugin Guice injection needs a class registered in the Node + * injector, and {@link org.opensearch.sql.plugin.SQLPlugin}'s SQL routing path is built in {@code + * getRestHandlers} — outside any Guice-managed lifecycle. Persisting the executor in this holder + * once {@link org.opensearch.sql.plugin.transport.TransportPPLQueryAction} is constructed lets the + * SQL router read the same instance without going back through the injector. + */ +public final class AnalyticsExecutorHolder { + + private static volatile QueryPlanExecutor> executor; + + private AnalyticsExecutorHolder() {} + + public static void set(QueryPlanExecutor> instance) { + executor = instance; + } + + public static QueryPlanExecutor> get() { + return executor; + } +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java index ffdd90504f7..5c6266beee1 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLQueryAction.java @@ -5,8 +5,6 @@ package org.opensearch.sql.plugin.rest; -import static org.opensearch.core.rest.RestStatus.BAD_REQUEST; -import static org.opensearch.core.rest.RestStatus.INTERNAL_SERVER_ERROR; import static org.opensearch.core.rest.RestStatus.OK; import com.google.common.collect.ImmutableList; @@ -25,10 +23,9 @@ import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.common.error.ErrorReport; import org.opensearch.sql.datasources.exceptions.DataSourceClientException; -import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.QueryEngineException; -import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.metrics.Metrics; import org.opensearch.sql.opensearch.response.error.ErrorMessageFactory; @@ -49,17 +46,47 @@ public RestPPLQueryAction() { super(); } - private static boolean isClientError(Exception e) { - return e instanceof NullPointerException - // NPE is hard to differentiate but more likely caused by bad query - || e instanceof IllegalArgumentException - || e instanceof IndexNotFoundException - || e instanceof SemanticCheckException - || e instanceof ExpressionEvaluationException - || e instanceof QueryEngineException - || e instanceof SyntaxCheckException - || e instanceof DataSourceClientException - || e instanceof IllegalAccessException; + private static boolean isClientError(Exception ex) { + // (Tombstone) NullPointerException has historically been treated as a client error, but + // nowadays they're rare and should be treated as system errors, since it represents a broken + // data model in our logic. + return ex instanceof IllegalArgumentException + || ex instanceof IndexNotFoundException + || ex instanceof QueryEngineException + || ex instanceof SyntaxCheckException + || ex instanceof DataSourceClientException + || ex instanceof IllegalAccessException; + } + + private static int getRawErrorCode(Exception ex) { + if (ex instanceof ErrorReport) { + return getRawErrorCode(((ErrorReport) ex).getCause()); + } + if (ex instanceof OpenSearchException) { + return ((OpenSearchException) ex).status().getStatus(); + } + // Possible future work: We currently do this on exception types, when we have more robust + // ErrorCodes in more locations it may be worth switching this to be based on those instead. + // That lets us identify specific error cases at a granularity higher than exception types. + if (isClientError(ex)) { + return 400; + } + return 500; + } + + private static RestStatus loggedErrorCode(Exception ex) { + int code = getRawErrorCode(ex); + + // If we hit neither branch, no-op as false alarm error? I don't believe we can ever hit this + // scenario. + if (400 <= code && code < 500) { + Metrics.getInstance().getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_CUS).increment(); + } else if (500 <= code && code < 600) { + Metrics.getInstance().getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_SYS).increment(); + } else { + LOG.warn("Got an exception returning non-error status {}", RestStatus.fromCode(code), ex); + } + return RestStatus.fromCode(code); } @Override @@ -98,33 +125,13 @@ public void onResponse(TransportPPLQueryResponse response) { @Override public void onFailure(Exception e) { + RestStatus status = loggedErrorCode(e); if (transportPPLQueryRequest.isExplainRequest()) { - LOG.error("Error happened during explain", e); - if (isClientError(e)) { - reportError(channel, e, BAD_REQUEST); - } else { - reportError(channel, e, INTERNAL_SERVER_ERROR); - } - } else if (e instanceof OpenSearchException) { - Metrics.getInstance() - .getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_CUS) - .increment(); - OpenSearchException exception = (OpenSearchException) e; - reportError(channel, exception, exception.status()); + LOG.error("Error happened during explain (status {})", status, e); } else { - LOG.error("Error happened during query handling", e); - if (isClientError(e)) { - Metrics.getInstance() - .getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_CUS) - .increment(); - reportError(channel, e, BAD_REQUEST); - } else { - Metrics.getInstance() - .getNumericalMetric(MetricName.PPL_FAILED_REQ_COUNT_SYS) - .increment(); - reportError(channel, e, INTERNAL_SERVER_ERROR); - } + LOG.error("Error happened during query handling (status {})", status, e); } + reportError(channel, e, status); } }); } diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestUnifiedQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestUnifiedQueryAction.java new file mode 100644 index 00000000000..7d39deb68d5 --- /dev/null +++ b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestUnifiedQueryAction.java @@ -0,0 +1,290 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin.rest; + +import static org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; +import static org.opensearch.sql.lang.PPLLangSpec.PPL_SPEC; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_WORKER_THREAD_POOL_NAME; +import static org.opensearch.sql.protocol.response.format.JsonResponseFormatter.Style.PRETTY; + +import java.util.Map; +import java.util.Optional; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.util.SqlBasicVisitor; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.ThreadContext; +import org.opensearch.analytics.exec.QueryPlanExecutor; +import org.opensearch.analytics.schema.OpenSearchSchemaBuilder; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.IndexSettings; +import org.opensearch.sql.api.UnifiedQueryContext; +import org.opensearch.sql.api.UnifiedQueryPlanner; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.ast.tree.Relation; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit; +import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.executor.ExecutionEngine.QueryResponse; +import org.opensearch.sql.executor.QueryType; +import org.opensearch.sql.executor.analytics.AnalyticsExecutionEngine; +import org.opensearch.sql.lang.LangSpec; +import org.opensearch.sql.plugin.transport.TransportPPLQueryResponse; +import org.opensearch.sql.protocol.response.QueryResult; +import org.opensearch.sql.protocol.response.format.ResponseFormatter; +import org.opensearch.sql.protocol.response.format.SimpleJsonResponseFormatter; +import org.opensearch.transport.client.node.NodeClient; + +/** + * Handles queries routed to the Analytics engine via the unified query pipeline. Parses PPL/SQL + * queries using {@link UnifiedQueryPlanner} to generate a Calcite {@link RelNode}, then delegates + * to {@link AnalyticsExecutionEngine} for execution. + */ +public class RestUnifiedQueryAction { + + private static final Logger LOG = LogManager.getLogger(RestUnifiedQueryAction.class); + private static final String SCHEMA_NAME = "opensearch"; + + private final AnalyticsExecutionEngine analyticsEngine; + private final NodeClient client; + private final ClusterService clusterService; + private final org.opensearch.sql.common.setting.Settings pluginSettings; + + public RestUnifiedQueryAction( + NodeClient client, + ClusterService clusterService, + QueryPlanExecutor> planExecutor, + org.opensearch.sql.common.setting.Settings pluginSettings) { + this.client = client; + this.clusterService = clusterService; + this.analyticsEngine = new AnalyticsExecutionEngine(planExecutor); + this.pluginSettings = pluginSettings; + } + + /** + * Returns true iff the target index has {@link + * IndexSettings#PLUGGABLE_DATAFORMAT_ENABLED_SETTING} set and {@link + * IndexSettings#PLUGGABLE_DATAFORMAT_VALUE_SETTING} is {@code "parquet"}, routing it to + * DataFusion instead of the Calcite→DSL path. + * + *

    Note: This creates a separate UnifiedQueryContext for parsing. The context cannot be shared + * with doExecute/doExplain because UnifiedQueryContext holds a Calcite JDBC connection that fails + * when used across threads (transport thread -> sql-worker thread). When real catalog metadata + * makes this expensive, consider moving the routing check to the sql-worker thread. + */ + public boolean isAnalyticsIndex(String query, QueryType queryType) { + if (query == null || query.isEmpty()) { + return false; + } + try (UnifiedQueryContext context = buildParsingContext(queryType)) { + return extractIndexName(query, queryType, context) + .map(this::stripSchemaPrefix) + .map(this::isPluggableDataformatIndex) + .orElse(false); + } catch (Exception e) { + return false; + } + } + + private String stripSchemaPrefix(String indexName) { + int lastDot = indexName.lastIndexOf('.'); + return lastDot >= 0 ? indexName.substring(lastDot + 1) : indexName; + } + + private boolean isPluggableDataformatIndex(String indexName) { + var indexMetadata = clusterService.state().metadata().index(indexName); + if (indexMetadata == null) { + return false; + } + var settings = indexMetadata.getSettings(); + return IndexSettings.PLUGGABLE_DATAFORMAT_ENABLED_SETTING.get(settings) + && "parquet".equals(IndexSettings.PLUGGABLE_DATAFORMAT_VALUE_SETTING.get(settings)); + } + + /** Execute a query through the unified query pipeline on the sql-worker thread pool. */ + public void execute( + String query, + QueryType queryType, + boolean profiling, + ActionListener listener) { + client + .threadPool() + .schedule( + withCurrentContext( + () -> { + try (UnifiedQueryContext context = buildContext(queryType, profiling)) { + UnifiedQueryPlanner planner = new UnifiedQueryPlanner(context); + RelNode plan = planner.plan(query); + CalcitePlanContext planContext = context.getPlanContext(); + plan = addQuerySizeLimit(plan, planContext); + analyticsEngine.execute( + plan, planContext, createQueryListener(queryType, listener)); + } catch (Exception e) { + listener.onFailure(e); + } + }), + new TimeValue(0), + SQL_WORKER_THREAD_POOL_NAME); + } + + /** + * Explain a query through the unified query pipeline on the sql-worker thread pool. Returns + * ExplainResponse via ResponseListener so the caller can format it. + */ + public void explain( + String query, + QueryType queryType, + ExplainMode mode, + ResponseListener listener) { + client + .threadPool() + .schedule( + withCurrentContext( + () -> { + try (UnifiedQueryContext context = buildContext(queryType, false)) { + UnifiedQueryPlanner planner = new UnifiedQueryPlanner(context); + RelNode plan = planner.plan(query); + CalcitePlanContext planContext = context.getPlanContext(); + plan = addQuerySizeLimit(plan, planContext); + analyticsEngine.explain(plan, mode, planContext, listener); + } catch (Exception e) { + listener.onFailure(e); + } + }), + new TimeValue(0), + SQL_WORKER_THREAD_POOL_NAME); + } + + /** + * Build a lightweight context for parsing only (index name extraction). Does not require cluster + * state or catalog schema. + */ + private UnifiedQueryContext buildParsingContext(QueryType queryType) { + return applyClusterOverrides(UnifiedQueryContext.builder().language(queryType)).build(); + } + + private UnifiedQueryContext buildContext(QueryType queryType, boolean profiling) { + return applyClusterOverrides( + UnifiedQueryContext.builder() + .language(queryType) + .catalog(SCHEMA_NAME, OpenSearchSchemaBuilder.buildSchema(clusterService.state())) + .defaultNamespace(SCHEMA_NAME) + .profiling(profiling)) + .build(); + } + + /** + * Routes operator-configured cluster overrides into the builder via the existing {@code + * setting(String, Object)} API, keeping {@link UnifiedQueryContext} decoupled from any specific + * {@link org.opensearch.sql.common.setting.Settings} implementation. + * + *

    Currently scoped to {@code plugins.ppl.rex.max_match.limit} — required so the unified path + * honors {@code _cluster/settings} updates for {@code rex max_match} (CalciteRexCommandIT's + * testRexMaxMatchConfigurableLimit). Add keys here if a future PR / IT depends on cluster-side + * fidelity for one of the other planning settings. + */ + private UnifiedQueryContext.Builder applyClusterOverrides(UnifiedQueryContext.Builder builder) { + Object rexLimit = + pluginSettings.getSettingValue( + org.opensearch.sql.common.setting.Settings.Key.PPL_REX_MAX_MATCH_LIMIT); + if (rexLimit != null) { + builder.setting( + org.opensearch.sql.common.setting.Settings.Key.PPL_REX_MAX_MATCH_LIMIT.getKeyValue(), + rexLimit); + } + return builder; + } + + /** + * Extract the source index name by parsing the query and visiting the AST to find the Relation + * node. Uses the context's parser which supports both PPL and SQL. + */ + private static Optional extractIndexName( + String query, QueryType queryType, UnifiedQueryContext context) { + if (queryType == QueryType.PPL) { + UnresolvedPlan unresolvedPlan = (UnresolvedPlan) context.getParser().parse(query); + return Optional.ofNullable(unresolvedPlan.accept(new IndexNameExtractor(), null)); + } + SqlNode sqlNode = (SqlNode) context.getParser().parse(query); + return Optional.ofNullable(extractTableNameFromSqlNode(sqlNode)); + } + + /** AST visitor that extracts the source index name from a Relation node (PPL path). */ + private static class IndexNameExtractor extends AbstractNodeVisitor { + @Override + public String visitRelation(Relation node, Void context) { + return node.getTableQualifiedName().toString(); + } + } + + /** SqlNode visitor that extracts the source table name from a SQL parse tree. */ + private static class SqlTableNameExtractor extends SqlBasicVisitor { + @Override + public String visit(SqlCall call) { + if (call instanceof SqlSelect select) { + return select.getFrom().accept(this); + } + if (call instanceof SqlJoin join) { + return join.getLeft().accept(this); + } + return null; + } + + @Override + public String visit(SqlIdentifier id) { + return id.toString(); + } + } + + private static String extractTableNameFromSqlNode(SqlNode sqlNode) { + return sqlNode.accept(new SqlTableNameExtractor()); + } + + private static RelNode addQuerySizeLimit(RelNode plan, CalcitePlanContext context) { + return LogicalSystemLimit.create( + LogicalSystemLimit.SystemLimitType.QUERY_SIZE_LIMIT, + plan, + context.relBuilder.literal(context.sysLimit.querySizeLimit())); + } + + private ResponseListener createQueryListener( + QueryType queryType, ActionListener transportListener) { + ResponseFormatter formatter = new SimpleJsonResponseFormatter(PRETTY); + return new ResponseListener() { + @Override + public void onResponse(QueryResponse response) { + LangSpec langSpec = queryType == QueryType.PPL ? PPL_SPEC : LangSpec.SQL_SPEC; + String result = + formatter.format( + new QueryResult( + response.getSchema(), response.getResults(), response.getCursor(), langSpec)); + transportListener.onResponse(new TransportPPLQueryResponse(result)); + } + + @Override + public void onFailure(Exception e) { + transportListener.onFailure(e); + } + }; + } + + private static Runnable withCurrentContext(final Runnable task) { + final Map currentContext = ThreadContext.getImmutableContext(); + return () -> { + ThreadContext.putAll(currentContext); + task.run(); + }; + } +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/PPLQueryTask.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/PPLQueryTask.java new file mode 100644 index 00000000000..2df96bdbd12 --- /dev/null +++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/PPLQueryTask.java @@ -0,0 +1,28 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin.transport; + +import java.util.Map; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.tasks.CancellableTask; + +public class PPLQueryTask extends CancellableTask { + + public PPLQueryTask( + long id, + String type, + String action, + String description, + TaskId parentTaskId, + Map headers) { + super(id, type, action, description, parentTaskId, headers); + } + + @Override + public boolean shouldCancelChildrenOnCancellation() { + return true; + } +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java index 48bc36374a8..365f1b26815 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryAction.java @@ -13,9 +13,11 @@ import java.util.Locale; import java.util.Optional; import java.util.function.Supplier; +import org.apache.calcite.rel.RelNode; import org.opensearch.action.ActionRequest; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.HandledTransportAction; +import org.opensearch.analytics.exec.QueryPlanExecutor; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.inject.Guice; import org.opensearch.common.inject.Inject; @@ -28,11 +30,15 @@ import org.opensearch.sql.datasource.DataSourceService; import org.opensearch.sql.datasources.service.DataSourceServiceImpl; import org.opensearch.sql.executor.ExecutionEngine; +import org.opensearch.sql.executor.QueryType; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.metrics.Metrics; import org.opensearch.sql.monitor.profile.QueryProfiling; +import org.opensearch.sql.opensearch.executor.OpenSearchQueryManager; import org.opensearch.sql.opensearch.setting.OpenSearchSettings; import org.opensearch.sql.plugin.config.OpenSearchPluginModule; +import org.opensearch.sql.plugin.rest.AnalyticsExecutorHolder; +import org.opensearch.sql.plugin.rest.RestUnifiedQueryAction; import org.opensearch.sql.ppl.PPLService; import org.opensearch.sql.ppl.domain.PPLQueryRequest; import org.opensearch.sql.protocol.response.QueryResult; @@ -56,7 +62,13 @@ public class TransportPPLQueryAction private final Supplier pplEnabled; - /** Constructor of TransportPPLQueryAction. */ + /** Null when analytics-engine plugin is absent; set via {@link #setQueryPlanExecutor}. */ + private volatile RestUnifiedQueryAction unifiedQueryHandler; + + private final NodeClient clientRef; + private final ClusterService clusterServiceRef; + private final org.opensearch.sql.common.setting.Settings pluginSettingsRef; + @Inject public TransportPPLQueryAction( TransportService transportService, @@ -66,14 +78,18 @@ public TransportPPLQueryAction( DataSourceServiceImpl dataSourceService, org.opensearch.common.settings.Settings clusterSettings) { super(PPLQueryAction.NAME, transportService, actionFilters, TransportPPLQueryRequest::new); + this.clientRef = client; + this.clusterServiceRef = clusterService; ModulesBuilder modules = new ModulesBuilder(); modules.add(new OpenSearchPluginModule()); + org.opensearch.sql.common.setting.Settings pluginSettings = + new OpenSearchSettings(clusterService.getClusterSettings()); + this.pluginSettingsRef = pluginSettings; modules.add( b -> { b.bind(NodeClient.class).toInstance(client); - b.bind(org.opensearch.sql.common.setting.Settings.class) - .toInstance(new OpenSearchSettings(clusterService.getClusterSettings())); + b.bind(org.opensearch.sql.common.setting.Settings.class).toInstance(pluginSettings); b.bind(DataSourceService.class).toInstance(dataSourceService); }); this.injector = Guice.createInjector(modules); @@ -86,6 +102,16 @@ public TransportPPLQueryAction( .getSettingValue(Settings.Key.PPL_ENABLED); } + /** Invoked by Guice iff analytics-engine bound {@code QueryPlanExecutor}. */ + @Inject(optional = true) + public void setQueryPlanExecutor( + QueryPlanExecutor> queryPlanExecutor) { + AnalyticsExecutorHolder.set(queryPlanExecutor); + this.unifiedQueryHandler = + new RestUnifiedQueryAction( + clientRef, clusterServiceRef, queryPlanExecutor, pluginSettingsRef); + } + /** * {@inheritDoc} Transform the request and call super.doExecute() to support call from other * plugins. @@ -109,17 +135,40 @@ protected void doExecute( return; } + if (task instanceof PPLQueryTask pplQueryTask) { + OpenSearchQueryManager.setCancellableTask(pplQueryTask); + } Metrics.getInstance().getNumericalMetric(MetricName.PPL_REQ_TOTAL).increment(); Metrics.getInstance().getNumericalMetric(MetricName.PPL_REQ_COUNT_TOTAL).increment(); QueryContext.addRequestId(); - PPLService pplService = injector.getInstance(PPLService.class); // in order to use PPL service, we need to convert TransportPPLQueryRequest to PPLQueryRequest PPLQueryRequest transformedRequest = transportRequest.toPPLQueryRequest(); QueryContext.setProfile(transformedRequest.profile()); ActionListener clearingListener = wrapWithProfilingClear(listener); + // Route to analytics engine for non-Lucene (e.g., Parquet-backed) indices. + if (unifiedQueryHandler != null + && unifiedQueryHandler.isAnalyticsIndex(transformedRequest.getRequest(), QueryType.PPL)) { + if (transformedRequest.isExplainRequest()) { + unifiedQueryHandler.explain( + transformedRequest.getRequest(), + QueryType.PPL, + transformedRequest.mode(), + createExplainResponseListener(transformedRequest, clearingListener)); + } else { + unifiedQueryHandler.execute( + transformedRequest.getRequest(), + QueryType.PPL, + transformedRequest.profile(), + clearingListener); + } + return; + } + + PPLService pplService = injector.getInstance(PPLService.class); + if (transformedRequest.isExplainRequest()) { pplService.explain( transformedRequest, createExplainResponseListener(transformedRequest, clearingListener)); diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java index 6db2bd249ae..4ba1a53d872 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequest.java @@ -9,6 +9,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Locale; +import java.util.Map; import java.util.Optional; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -21,6 +22,7 @@ import org.opensearch.core.common.io.stream.OutputStreamStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.tasks.TaskId; import org.opensearch.sql.ppl.domain.PPLQueryRequest; import org.opensearch.sql.protocol.response.format.Format; import org.opensearch.sql.protocol.response.format.JsonResponseFormatter; @@ -51,6 +53,11 @@ public class TransportPPLQueryRequest extends ActionRequest { @Accessors(fluent = true) private boolean profile = false; + @Setter + @Getter + @Accessors(fluent = true) + private String queryId = null; + /** Constructor of TransportPPLQueryRequest from PPLQueryRequest. */ public TransportPPLQueryRequest(PPLQueryRequest pplQueryRequest) { pplQuery = pplQueryRequest.getRequest(); @@ -61,6 +68,7 @@ public TransportPPLQueryRequest(PPLQueryRequest pplQueryRequest) { style = pplQueryRequest.style(); profile = pplQueryRequest.profile(); explainMode = pplQueryRequest.mode().getModeName(); + queryId = pplQueryRequest.queryId(); } /** Constructor of TransportPPLQueryRequest from StreamInput. */ @@ -75,6 +83,7 @@ public TransportPPLQueryRequest(StreamInput in) throws IOException { sanitize = in.readBoolean(); style = in.readEnum(JsonResponseFormatter.Style.class); profile = in.readBoolean(); + queryId = in.readOptionalString(); } /** Re-create the object from the actionRequest. */ @@ -107,6 +116,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(sanitize); out.writeEnum(style); out.writeBoolean(profile); + out.writeOptionalString(queryId); } public String getRequest() { @@ -147,12 +157,25 @@ public ActionRequestValidationException validate() { return null; } + @Override + public PPLQueryTask createTask( + long id, String type, String action, TaskId parentTaskId, Map headers) { + return new PPLQueryTask(id, type, action, getDescription(), parentTaskId, headers); + } + + @Override + public String getDescription() { + String prefix = (queryId != null) ? "PPL [queryId=" + queryId + "]: " : "PPL: "; + return prefix + pplQuery; + } + /** Convert to PPLQueryRequest. */ public PPLQueryRequest toPPLQueryRequest() { PPLQueryRequest pplQueryRequest = new PPLQueryRequest(pplQuery, jsonContent, path, format, explainMode, profile); pplQueryRequest.sanitize(sanitize); pplQueryRequest.style(style); + pplQueryRequest.queryId(queryId); return pplQueryRequest; } } diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestUnifiedQueryActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestUnifiedQueryActionTest.java new file mode 100644 index 00000000000..25f157f0d9a --- /dev/null +++ b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestUnifiedQueryActionTest.java @@ -0,0 +1,103 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin.rest; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import org.apache.calcite.rel.RelNode; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.analytics.exec.QueryPlanExecutor; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.IndexSettings; +import org.opensearch.sql.executor.QueryType; +import org.opensearch.transport.client.node.NodeClient; + +/** + * Tests for analytics index routing in RestUnifiedQueryAction. Routing requires both {@code + * index.pluggable.dataformat.enabled=true} and {@code index.pluggable.dataformat=parquet}. + */ +public class RestUnifiedQueryActionTest { + + private ClusterService clusterService; + private Metadata metadata; + private RestUnifiedQueryAction action; + + @Before + public void setUp() { + clusterService = mock(ClusterService.class); + ClusterState clusterState = mock(ClusterState.class); + metadata = mock(Metadata.class); + when(clusterService.state()).thenReturn(clusterState); + when(clusterState.metadata()).thenReturn(metadata); + + @SuppressWarnings("unchecked") + QueryPlanExecutor> executor = mock(QueryPlanExecutor.class); + action = + new RestUnifiedQueryAction( + mock(NodeClient.class), + clusterService, + executor, + mock(org.opensearch.sql.common.setting.Settings.class)); + } + + @Test + public void pluggableDataformatIndexRoutesToAnalytics() { + registerIndex( + "parquet_logs", + Settings.builder() + .put(IndexSettings.PLUGGABLE_DATAFORMAT_ENABLED_SETTING.getKey(), true) + .put(IndexSettings.PLUGGABLE_DATAFORMAT_VALUE_SETTING.getKey(), "parquet") + .build()); + + assertTrue(action.isAnalyticsIndex("source = parquet_logs | fields ts", QueryType.PPL)); + assertTrue( + action.isAnalyticsIndex("source = opensearch.parquet_logs | fields ts", QueryType.PPL)); + } + + @Test + public void pluggableEnabledButLuceneFormatRoutesToLucene() { + registerIndex( + "lucene_logs", + Settings.builder() + .put(IndexSettings.PLUGGABLE_DATAFORMAT_ENABLED_SETTING.getKey(), true) + .put(IndexSettings.PLUGGABLE_DATAFORMAT_VALUE_SETTING.getKey(), "lucene") + .build()); + + assertFalse(action.isAnalyticsIndex("source = lucene_logs | fields ts", QueryType.PPL)); + } + + @Test + public void indexWithoutSettingRoutesToLucene() { + registerIndex("plain_logs", Settings.EMPTY); + + assertFalse(action.isAnalyticsIndex("source = plain_logs | fields ts", QueryType.PPL)); + } + + @Test + public void missingIndexRoutesToLucene() { + assertFalse(action.isAnalyticsIndex("source = does_not_exist | fields ts", QueryType.PPL)); + } + + @Test + public void nullAndEmptyQueriesRouteToLucene() { + assertFalse(action.isAnalyticsIndex(null, QueryType.PPL)); + assertFalse(action.isAnalyticsIndex("", QueryType.PPL)); + } + + private void registerIndex(String name, Settings settings) { + IndexMetadata indexMetadata = mock(IndexMetadata.class); + when(indexMetadata.getSettings()).thenReturn(settings); + when(metadata.index(name)).thenReturn(indexMetadata); + } +} diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/transport/PPLQueryTaskTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/transport/PPLQueryTaskTest.java new file mode 100644 index 00000000000..c9502ac3bbf --- /dev/null +++ b/plugin/src/test/java/org/opensearch/sql/plugin/transport/PPLQueryTaskTest.java @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.plugin.transport; + +import static org.junit.Assert.*; + +import java.util.Map; +import org.junit.Test; +import org.opensearch.core.tasks.TaskId; + +public class PPLQueryTaskTest { + + @Test + public void testShouldCancelChildrenReturnsTrue() { + PPLQueryTask pplQueryTask = + new PPLQueryTask( + 1, + "transport", + "cluster:admin/opensearch/ppl", + "test query", + TaskId.EMPTY_TASK_ID, + Map.of()); + assertTrue(pplQueryTask.shouldCancelChildrenOnCancellation()); + } + + @Test + public void testCreateTaskReturnsPPLQueryTask() { + TransportPPLQueryRequest transportPPLQueryRequest = + new TransportPPLQueryRequest("source=t a=1", null, "/_plugins/_ppl"); + PPLQueryTask task = + transportPPLQueryRequest.createTask( + 1, "transport", "cluster:admin/opensearch/ppl", TaskId.EMPTY_TASK_ID, Map.of()); + assertNotNull(task); + } + + @Test + public void testWithQueryId() { + TransportPPLQueryRequest transportPPLQueryRequest = + new TransportPPLQueryRequest("source=t a=1", null, "/_plugins/_ppl"); + transportPPLQueryRequest.queryId("test-123"); + assertEquals("PPL [queryId=test-123]: source=t a=1", transportPPLQueryRequest.getDescription()); + } + + @Test + public void testWithoutQueryId() { + TransportPPLQueryRequest transportPPLQueryRequest = + new TransportPPLQueryRequest("source=t a=1", null, "/_plugins/_ppl"); + assertEquals("PPL: source=t a=1", transportPPLQueryRequest.getDescription()); + } + + @Test + public void testCooperativeModel() { + TransportPPLQueryRequest transportPPLQueryRequest = + new TransportPPLQueryRequest("source=t a=1", null, "/_plugins/_ppl"); + PPLQueryTask task = + transportPPLQueryRequest.createTask( + 1, "transport", "cluster:admin/opensearch/ppl", TaskId.EMPTY_TASK_ID, Map.of()); + assertFalse(task.isCancelled()); + task.cancel("Test"); + assertTrue(task.isCancelled()); + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 36b52cf8a1b..4bc69a8f295 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -171,6 +171,8 @@ TRAINING_DATA_SIZE: 'TRAINING_DATA_SIZE'; ANOMALY_SCORE_THRESHOLD: 'ANOMALY_SCORE_THRESHOLD'; APPEND: 'APPEND'; MULTISEARCH: 'MULTISEARCH'; +UNION: 'UNION'; +MAXOUT: 'MAXOUT'; COUNTFIELD: 'COUNTFIELD'; SHOWCOUNT: 'SHOWCOUNT'; LIMIT: 'LIMIT'; @@ -187,7 +189,9 @@ PATH: 'PATH'; CASE: 'CASE'; ELSE: 'ELSE'; IN: 'IN'; +IS: 'IS'; EXISTS: 'EXISTS'; +NULL: 'NULL'; // Geo IP eval function GEOIP: 'GEOIP'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index ed8b1b599bd..bcaaa105774 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -48,6 +48,8 @@ pplCommands | showDataSourcesCommand | searchCommand | multisearchCommand + | graphLookupCommand + | unionCommand ; commands @@ -95,6 +97,7 @@ commands | fieldformatCommand | nomvCommand | graphLookupCommand + | unionCommand ; commandName @@ -138,6 +141,7 @@ commandName | ADDCOLTOTALS | APPEND | MULTISEARCH + | UNION | REX | APPENDPIPE | REPLACE @@ -543,7 +547,7 @@ replacementPair ; convertCommand - : CONVERT convertFunction (COMMA? convertFunction)* + : CONVERT (TIMEFORMAT EQUAL timeFormat=stringLiteral)? convertFunction (COMMA? convertFunction)* ; convertFunction @@ -595,6 +599,19 @@ multisearchCommand : MULTISEARCH (LT_SQR_PRTHS subSearch RT_SQR_PRTHS)+ ; +unionCommand + : UNION subsearchOptions? unionDataset (COMMA? unionDataset)* + ; + +subsearchOptions + : (MAXOUT EQUAL maxout=integerLiteral)? + ; + +unionDataset + : LT_SQR_PRTHS subSearch RT_SQR_PRTHS + | tableSource + ; + kmeansCommand : KMEANS (kmeansParameter)* ; @@ -660,7 +677,9 @@ graphLookupCommand ; startClause - : START EQUAL startField = fieldExpression + : START EQUAL valueList + | START EQUAL startField = fieldExpression + | START EQUAL startValue = literalValue ; edgeClause @@ -705,7 +724,7 @@ sourceReference sourceFilterArg : ident EQUAL literalValue - | ident IN valueList + | ident IN LT_PRTHS valueList RT_PRTHS ; // join @@ -906,8 +925,13 @@ expression : valueExpression # valueExpr | relevanceExpression # relevanceExpr | left = expression comparisonOperator right = expression # compareExpr - | expression NOT? IN valueList # inExpr + | expression NOT? IN LT_PRTHS valueList RT_PRTHS # inExpr | expression NOT? BETWEEN expression AND expression # between + | expression IS nullNotnull # isNullPredicate + ; + +nullNotnull + : NOT? NULL ; @@ -1550,7 +1574,7 @@ intervalUnit ; valueList - : LT_PRTHS literalValue (COMMA literalValue)* RT_PRTHS + : literalValue (COMMA literalValue)* ; qualifiedName @@ -1591,6 +1615,8 @@ wildcard keywordsCanBeId : searchableKeyWord | IN + | IS + | NULL ; searchableKeyWord @@ -1680,6 +1706,7 @@ searchableKeyWord | ANOMALY_SCORE_THRESHOLD | COUNTFIELD | SHOWCOUNT + | MAXOUT | PATH | INPUT | OUTPUT diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java index 4201c9cf6ab..06c7fe1c38e 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/domain/PPLQueryRequest.java @@ -52,6 +52,11 @@ public class PPLQueryRequest { @Accessors(fluent = true) private boolean profile = false; + @Setter + @Getter + @Accessors(fluent = true) + private String queryId = null; + public PPLQueryRequest(String pplQuery, JSONObject jsonContent, String path) { this(pplQuery, jsonContent, path, ""); } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 079cc47cb5d..d4f5eea0fb7 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -120,6 +120,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.calcite.plan.OpenSearchConstants; @@ -1212,12 +1213,20 @@ public UnresolvedPlan visitConvertCommand(OpenSearchPPLParser.ConvertCommandCont .map(this::buildConversion) .filter(conversion -> conversion != null) .collect(Collectors.toList()); - return new Convert(conversions); + + String timeFormat = null; + if (ctx.timeFormat != null) { + timeFormat = StringUtils.unquoteText(ctx.timeFormat.getText()); + } + + return new Convert(conversions, timeFormat); } /** Supported PPL convert function names (case-insensitive). */ private static final Set SUPPORTED_CONVERSION_FUNCTIONS = - Set.of("auto", "num", "rmcomma", "rmunit", "memk", "none"); + Set.of( + "auto", "num", "rmcomma", "rmunit", "memk", "none", "ctime", "mktime", "dur2sec", + "mstime"); private Let buildConversion(OpenSearchPPLParser.ConvertFunctionContext funcCtx) { if (funcCtx.fieldExpression().isEmpty()) { @@ -1339,6 +1348,37 @@ public UnresolvedPlan visitMultisearchCommand(OpenSearchPPLParser.MultisearchCom return new Multisearch(subsearches); } + @Override + public UnresolvedPlan visitUnionCommand(OpenSearchPPLParser.UnionCommandContext ctx) { + List datasets = new ArrayList<>(); + + Integer maxout = null; + if (ctx.subsearchOptions() != null) { + OpenSearchPPLParser.SubsearchOptionsContext opts = ctx.subsearchOptions(); + if (opts.maxout != null) { + maxout = Integer.parseInt(opts.maxout.getText()); + } + } + + for (OpenSearchPPLParser.UnionDatasetContext datasetCtx : ctx.unionDataset()) { + if (datasetCtx.subSearch() != null) { + datasets.add(visitSubSearch(datasetCtx.subSearch())); + } else if (datasetCtx.tableSource() != null) { + datasets.add( + new Relation( + Collections.singletonList(internalVisitExpression(datasetCtx.tableSource())))); + } + } + + // Allow 1+ here; total count (including implicit upstream) validated during planning + if (datasets.isEmpty()) { + throw new SyntaxCheckException( + "Union command requires at least one dataset. Provided: " + datasets.size()); + } + + return new Union(datasets, maxout); + } + @Override public UnresolvedPlan visitRexCommand(OpenSearchPPLParser.RexCommandContext ctx) { UnresolvedExpression field = internalVisitExpression(ctx.rexExpr().field); @@ -1570,7 +1610,22 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom // Parse required base: start and edge OpenSearchPPLParser.StartClauseContext startCtx = ctx.startClause(); - Field startField = (Field) internalVisitExpression(startCtx.startField); + Field startField = null; + List startValues = null; + if (startCtx.startField != null) { + // Piped mode: start=fieldExpression + startField = (Field) internalVisitExpression(startCtx.startField); + } else if (startCtx.startValue != null) { + // Top-level mode: single literal e.g. start="Jack" + startValues = List.of((Literal) internalVisitExpression(startCtx.startValue)); + } else if (startCtx.valueList() != null) { + // Top-level mode: literal list e.g. start="Jack", "Eliot" + OpenSearchPPLParser.ValueListContext listCtx = startCtx.valueList(); + startValues = new ArrayList<>(); + for (OpenSearchPPLParser.LiteralValueContext lit : listCtx.literalValue()) { + startValues.add((Literal) internalVisitExpression(lit)); + } + } // Parse edge clause from EDGE_CLAUSE token (e.g., "edge=manager-->name") OpenSearchPPLParser.EdgeClauseContext edgeCtx = ctx.edgeClause(); String edgeClauseText = edgeCtx.edgeClauseToken.getText(); @@ -1630,6 +1685,7 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom .as(as) .maxDepth(maxDepth) .startField(startField) + .startValues(startValues) .depthField(depthField) .direction(direction) .supportArray(supportArray) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index c58eca20575..77d5c77a635 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -249,6 +249,15 @@ public UnresolvedExpression visitInExpr(InExprContext ctx) { return ctx.NOT() != null ? new Not(expr) : expr; } + @Override + public UnresolvedExpression visitIsNullPredicate(OpenSearchPPLParser.IsNullPredicateContext ctx) { + return new Function( + ctx.nullNotnull().NOT() == null + ? IS_NULL.getName().getFunctionName() + : IS_NOT_NULL.getName().getFunctionName(), + Arrays.asList(visit(ctx.expression()))); + } + /** Value Expression. */ @Override public UnresolvedExpression visitBinaryArithmetic(BinaryArithmeticContext ctx) { @@ -300,18 +309,20 @@ public UnresolvedExpression visitRenameFieldExpression(RenameFieldExpressionCont @Override public UnresolvedExpression visitPrefixSortField(OpenSearchPPLParser.PrefixSortFieldContext ctx) { - return buildSortField(ctx.sortFieldExpression(), ctx); + boolean ascending = ctx.MINUS() == null; + return buildSortField(ctx.sortFieldExpression(), ascending); } @Override public UnresolvedExpression visitSuffixSortField(OpenSearchPPLParser.SuffixSortFieldContext ctx) { - return buildSortField(ctx.sortFieldExpression(), ctx); + boolean ascending = (ctx.DESC() == null && ctx.D() == null); + return buildSortField(ctx.sortFieldExpression(), ascending); } @Override public UnresolvedExpression visitDefaultSortField( OpenSearchPPLParser.DefaultSortFieldContext ctx) { - return buildSortField(ctx.sortFieldExpression(), ctx); + return buildSortField(ctx.sortFieldExpression(), true); } @Override @@ -334,8 +345,7 @@ public UnresolvedExpression visitInvalidMixedSortField( } private Field buildSortField( - OpenSearchPPLParser.SortFieldExpressionContext sortFieldExpr, - OpenSearchPPLParser.SortFieldContext parentCtx) { + OpenSearchPPLParser.SortFieldExpressionContext sortFieldExpr, boolean ascending) { UnresolvedExpression fieldExpression = visit(sortFieldExpr.fieldExpression().qualifiedName()); if (sortFieldExpr.IP() != null) { @@ -346,7 +356,12 @@ private Field buildSortField( fieldExpression = new Cast(fieldExpression, AstDSL.stringLiteral("string")); } // AUTO() case uses the field expression as-is - return new Field(fieldExpression, ArgumentFactory.getArgumentList(parentCtx)); + + List arguments = + Arrays.asList( + ArgumentFactory.createSortDirectionArgument(ascending), + ArgumentFactory.getTypeArgument(sortFieldExpr)); + return new Field(fieldExpression, arguments); } @Override diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 72090e2f069..2cdc702b785 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -27,14 +27,10 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.ChartCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DecimalLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DefaultSortFieldContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EventstatsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.IntegerLiteralContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.PrefixSortFieldContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SortFieldContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StreamstatsCommandContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SuffixSortFieldContext; import org.opensearch.sql.ppl.parser.AstExpressionBuilder; /** Util class to get all arguments as a list from the PPL command. */ @@ -155,63 +151,17 @@ public static List getArgumentList(DedupCommandContext ctx) { } /** - * Get list of {@link Argument}. + * Creates an "asc" argument for sort field direction. * - * @param ctx SortFieldContext instance - * @return the list of arguments fetched from the sort field in sort command + * @param ascending true for ascending sort, false for descending + * @return Argument representing the sort direction */ - public static List getArgumentList(SortFieldContext ctx) { - if (ctx instanceof PrefixSortFieldContext) { - return getArgumentList((PrefixSortFieldContext) ctx); - } else if (ctx instanceof SuffixSortFieldContext) { - return getArgumentList((SuffixSortFieldContext) ctx); - } else { - return getArgumentList((DefaultSortFieldContext) ctx); - } - } - - /** - * Get list of {@link Argument} for prefix sort field (+/- syntax). - * - * @param ctx PrefixSortFieldContext instance - * @return the list of arguments fetched from the prefix sort field - */ - public static List getArgumentList(PrefixSortFieldContext ctx) { - return Arrays.asList( - ctx.MINUS() != null - ? new Argument("asc", new Literal(false, DataType.BOOLEAN)) - : new Argument("asc", new Literal(true, DataType.BOOLEAN)), - getTypeArgument(ctx.sortFieldExpression())); - } - - /** - * Get list of {@link Argument} for suffix sort field (asc/desc syntax). - * - * @param ctx SuffixSortFieldContext instance - * @return the list of arguments fetched from the suffix sort field - */ - public static List getArgumentList(SuffixSortFieldContext ctx) { - return Arrays.asList( - (ctx.DESC() != null || ctx.D() != null) - ? new Argument("asc", new Literal(false, DataType.BOOLEAN)) - : new Argument("asc", new Literal(true, DataType.BOOLEAN)), - getTypeArgument(ctx.sortFieldExpression())); - } - - /** - * Get list of {@link Argument} for default sort field (no direction specified). - * - * @param ctx DefaultSortFieldContext instance - * @return the list of arguments fetched from the default sort field - */ - public static List getArgumentList(DefaultSortFieldContext ctx) { - return Arrays.asList( - new Argument("asc", new Literal(true, DataType.BOOLEAN)), - getTypeArgument(ctx.sortFieldExpression())); + public static Argument createSortDirectionArgument(boolean ascending) { + return new Argument("asc", new Literal(ascending, DataType.BOOLEAN)); } /** Helper method to get type argument from sortFieldExpression. */ - private static Argument getTypeArgument(OpenSearchPPLParser.SortFieldExpressionContext ctx) { + public static Argument getTypeArgument(OpenSearchPPLParser.SortFieldExpressionContext ctx) { if (ctx.AUTO() != null) { return new Argument("type", new Literal("auto", DataType.STRING)); } else if (ctx.IP() != null) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 96c0787d5e3..4b75d444467 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -107,6 +107,7 @@ import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.ast.tree.Union; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -230,11 +231,26 @@ public String visitLookup(Lookup node, String context) { @Override public String visitGraphLookup(GraphLookup node, String context) { - String child = node.getChild().get(0).accept(this, context); StringBuilder command = new StringBuilder(); - command.append(child).append(" | graphlookup ").append(MASK_TABLE); - if (node.getStartField() != null) { - command.append(" start=").append(MASK_COLUMN); + if (node.getStartValues() != null) { + // Top-level mode: no child/pipe prefix + command.append("graphlookup ").append(MASK_TABLE); + if (node.getStartValues().size() == 1) { + command.append(" start=").append(MASK_LITERAL); + } else { + command.append(" start="); + for (int i = 0; i < node.getStartValues().size(); i++) { + if (i > 0) command.append(", "); + command.append(MASK_LITERAL); + } + } + } else { + // Piped mode: has child + String child = node.getChild().get(0).accept(this, context); + command.append(child).append(" | graphlookup ").append(MASK_TABLE); + if (node.getStartField() != null) { + command.append(" start=").append(MASK_COLUMN); + } } String arrow = node.getDirection() == GraphLookup.Direction.BI ? "<->" : "-->"; command.append(" edge=").append(MASK_COLUMN).append(arrow).append(MASK_COLUMN); @@ -527,7 +543,11 @@ public String visitConvert(Convert node, String context) { return StringUtils.format("%s(%s)%s", functionName, fields, asClause); }) .collect(Collectors.joining(",")); - return StringUtils.format("%s | convert %s", child, conversions); + String timeformatClause = + node.getTimeFormat() != null + ? StringUtils.format("timeformat=\"%s\" ", node.getTimeFormat()) + : ""; + return StringUtils.format("%s | convert %s%s", child, timeformatClause, conversions); } @Override @@ -778,32 +798,37 @@ public String visitAppend(Append node, String context) { @Override public String visitMultisearch(Multisearch node, String context) { + return anonymizeSubsearchCommand("multisearch", node.getSubsearches()); + } + + @Override + public String visitUnion(Union node, String context) { + return anonymizeSubsearchCommand("union", node.getDatasets()); + } + + private String anonymizeSubsearchCommand(String commandName, List subsearches) { + String keywords = + "source|fields|where|stats|head|tail|sort|eval|rename|" + + commandName + + "|search|table|identifier|\\*\\*\\*"; List anonymizedSubsearches = new ArrayList<>(); - for (UnresolvedPlan subsearch : node.getSubsearches()) { + for (UnresolvedPlan subsearch : subsearches) { String anonymizedSubsearch = anonymizeData(subsearch); anonymizedSubsearch = "search " + anonymizedSubsearch; anonymizedSubsearch = anonymizedSubsearch - .replaceAll("\\bsource=\\w+", "source=table") // Replace table names after source= - .replaceAll( - "\\b(?!source|fields|where|stats|head|tail|sort|eval|rename|multisearch|search|table|identifier|\\*\\*\\*)\\w+(?=\\s*[<>=!])", - "identifier") // Replace field names before operators - .replaceAll( - "\\b(?!source|fields|where|stats|head|tail|sort|eval|rename|multisearch|search|table|identifier|\\*\\*\\*)\\w+(?=\\s*,)", - "identifier") // Replace field names before commas - .replaceAll( - "fields" - + " \\+\\s*\\b(?!source|fields|where|stats|head|tail|sort|eval|rename|multisearch|search|table|identifier|\\*\\*\\*)\\w+", - "fields + identifier") // Replace field names after 'fields +' + .replaceAll("\\bsource=\\w+", "source=table") + .replaceAll("\\b(?!" + keywords + ")\\w+(?=\\s*[<>=!])", "identifier") + .replaceAll("\\b(?!" + keywords + ")\\w+(?=\\s*,)", "identifier") + .replaceAll("fields \\+\\s*\\b(?!" + keywords + ")\\w+", "fields + identifier") .replaceAll( - "fields" - + " \\+\\s*identifier,\\s*\\b(?!source|fields|where|stats|head|tail|sort|eval|rename|multisearch|search|table|identifier|\\*\\*\\*)\\w+", - "fields + identifier,identifier"); // Handle multiple fields + "fields \\+\\s*identifier,\\s*\\b(?!" + keywords + ")\\w+", + "fields + identifier,identifier"); anonymizedSubsearches.add(StringUtils.format("[%s]", anonymizedSubsearch)); } - return StringUtils.format("| multisearch %s", String.join(" ", anonymizedSubsearches)); + return StringUtils.format("| %s %s", commandName, String.join(" ", anonymizedSubsearches)); } @Override diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java index faf944da4a0..56ed409b4d7 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java @@ -59,4 +59,152 @@ public void testAppendPipeWithMergedColumns() { + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + /** + * Regression test: double appendpipe with different aggregations. Result count (16 = 14 + 1 avg + + * 1 max) is verified in integration tests only because RelRunners.run() creates a new planner + * that conflicts with shared RelNode subtrees — a test framework limitation that does not affect + * the production path. + */ + @Test + public void testDoubleAppendPipe() { + String ppl = + "source=EMP | appendpipe [stats avg(SAL) as avg_sal] | appendpipe [stats max(SAL) as" + + " max_sal]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[$8], max_sal=[null:DECIMAL(7, 2)])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[null:DECIMAL(11, 6)], max_sal=[$0])\n" + + " LogicalAggregate(group=[{}], max_sal=[MAX($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + /** + * Regression test: triple appendpipe with different aggregations. Result count (17 = 14 + 1 avg + + * 1 max + 1 min) is verified in integration tests only — see testDoubleAppendPipe for rationale. + */ + @Test + public void testTripleAppendPipe() { + String ppl = + "source=EMP | appendpipe [stats avg(SAL) as avg_sal] | appendpipe [stats max(SAL) as" + + " max_sal] | appendpipe [stats min(SAL) as min_sal]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[$8], max_sal=[$9]," + + " min_sal=[null:DECIMAL(7, 2)])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[$8]," + + " max_sal=[null:DECIMAL(7, 2)])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[null:DECIMAL(11, 6)], max_sal=[$0])\n" + + " LogicalAggregate(group=[{}], max_sal=[MAX($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[null:DECIMAL(11, 6)], max_sal=[null:DECIMAL(7, 2)], min_sal=[$0])\n" + + " LogicalAggregate(group=[{}], min_sal=[MIN($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], avg_sal=[$8]," + + " max_sal=[null:DECIMAL(7, 2)])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[null:DECIMAL(11, 6)], max_sal=[$0])\n" + + " LogicalAggregate(group=[{}], max_sal=[MAX($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " avg_sal=[null:DECIMAL(11, 6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[null:TINYINT]," + + " avg_sal=[$0])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + /** Regression test: double appendpipe with non-aggregation (filter) subpipeline. */ + @Test + public void testDoubleAppendPipeWithFilter() { + String ppl = "source=EMP | appendpipe [where DEPTNO = 20] | appendpipe [where DEPTNO = 30]"; + RelNode root = getRelNode(ppl); + verifyResultCount(root, 25); // 14 original + 5 (dept 20) + 6 (dept 30) + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java index 784fedc2ede..472e77e2d29 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java @@ -14,6 +14,7 @@ import org.apache.calcite.test.CalciteAssert; import org.junit.Ignore; import org.junit.Test; +import org.opensearch.sql.common.error.ErrorReport; public class CalcitePPLBasicTest extends CalcitePPLAbstractTest { @@ -201,9 +202,9 @@ public void testFieldsPlusThenMinus() { @Test public void testFieldsMinusThenPlusShouldThrowException() { String ppl = "source=EMP | fields - DEPTNO, SAL | fields + EMPNO, DEPTNO, SAL"; - IllegalArgumentException e = + ErrorReport e = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> { RelNode root = getRelNode(ppl); }); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java new file mode 100644 index 00000000000..6d62dbb2cde --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java @@ -0,0 +1,170 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; + +/** + * Unit test for GitHub issue #5174: bin/chart NPE with null values. + * + *

    Verifies that the chart command generates correct logical plans when the input contains null + * values from binning, and that the sort operations properly handle nulls. + */ +public class CalcitePPLChartNullTest extends CalcitePPLAbstractTest { + + public CalcitePPLChartNullTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Table with null values matching the issue's bounty-numbers schema + ImmutableList rows = + ImmutableList.of( + new Object[] {1, "A", "X", 10.5}, + new Object[] {2, "A", "Y", 20.3}, + new Object[] {10, "B", "X", 100.0}, + new Object[] {null, "B", "Y", null}); + schema.add("bounty_numbers", new BountyNumbersTable(rows)); + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testBinThenChartWithNullValuesLogicalPlan() { + String ppl = + "source=bounty_numbers | bin value span=50 as val_bin" + + " | chart count() over val_bin by category"; + RelNode root = getRelNode(ppl); + // Verify the SQL plan contains WHERE val_bin IS NOT NULL to filter null bin values, + // and NULLS LAST in ORDER BY for proper null handling in sort + String expectedSparkSql = + "SELECT `t2`.`val_bin`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + + " `t10`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END" + + " `category`, SUM(`t2`.`count()`) `count()`\n" + + "FROM (SELECT `val_bin`, `category`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`, `category`) `t2`\n" + + "LEFT JOIN (SELECT `category`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY SUM(`count()`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `category`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t3`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`, `category`) `t7`\n" + + "WHERE `category` IS NOT NULL\n" + + "GROUP BY `category`) `t10` ON `t2`.`category` = `t10`.`category`\n" + + "GROUP BY `t2`.`val_bin`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + + " `t10`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`val_bin` NULLS LAST, 2 NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testBinThenChartSingleGroupWithNullValuesLogicalPlan() { + String ppl = + "source=bounty_numbers | bin value span=50 as val_bin | chart count() over val_bin"; + RelNode root = getRelNode(ppl); + // Verify null bin values are filtered and sort uses NULLS LAST + String expectedSparkSql = + "SELECT `val_bin`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`\n" + + "ORDER BY `val_bin` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @RequiredArgsConstructor + public static class BountyNumbersTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("count", SqlTypeName.INTEGER) + .nullable(true) + .add("category", SqlTypeName.VARCHAR) + .nullable(true) + .add("subcategory", SqlTypeName.VARCHAR) + .nullable(true) + .add("value", SqlTypeName.DOUBLE) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(4d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java index 936b4212f4f..f49a967aa86 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java @@ -269,4 +269,139 @@ public void testConvertAutoWithMemoryField() { + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testConvertMktimeFunction() { + String ppl = "source=EMP | convert mktime(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MKTIME($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MKTIME(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertCtimeFunction() { + String ppl = "source=EMP | convert ctime(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[CTIME($5)]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, CTIME(`SAL`) `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertDur2secFunction() { + String ppl = "source=EMP | convert dur2sec(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[DUR2SEC($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, DUR2SEC(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertMstimeFunction() { + String ppl = "source=EMP | convert mstime(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MSTIME($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MSTIME(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithTimeformatMktime() { + String ppl = "source=EMP | convert timeformat=\"%Y-%m-%d\" mktime(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MKTIME($1, '%Y-%m-%d')], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MKTIME(`ENAME`, '%Y-%m-%d') `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`," + + " `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithTimeformatCtime() { + String ppl = "source=EMP | convert timeformat=\"%Y-%m-%d %H:%M:%S\" ctime(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[CTIME($5," + + " '%Y-%m-%d %H:%M:%S')], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, CTIME(`SAL`, '%Y-%m-%d %H:%M:%S')" + + " `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertTimeformatWithMultipleFunctions() { + String ppl = "source=EMP | convert timeformat=\"%Y-%m-%d\" mktime(ENAME), ctime(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MKTIME($1, '%Y-%m-%d')], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[CTIME($5, '%Y-%m-%d')], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MKTIME(`ENAME`, '%Y-%m-%d') `ENAME`, `JOB`, `MGR`, `HIREDATE`," + + " CTIME(`SAL`, '%Y-%m-%d') `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertTimeformatMixedWithNonTimeFunctions() { + String ppl = "source=EMP | convert timeformat=\"%Y-%m-%d\" mktime(ENAME), auto(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MKTIME($1, '%Y-%m-%d')], JOB=[$2], MGR=[$3]," + + " HIREDATE=[$4], SAL=[AUTO($5)], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MKTIME(`ENAME`, '%Y-%m-%d') `ENAME`, `JOB`, `MGR`, `HIREDATE`, AUTO(`SAL`)" + + " `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 13a116a1a00..ca1a789b0f4 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -217,18 +217,75 @@ public void testDedupExpr() { "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields NEW_DEPTNO, EMPNO, ENAME, JOB | sort" + " NEW_DEPTNO | dedup 1 NEW_DEPTNO"; root = getRelNode(ppl); + // Sort is stripped from below the window and moved to the top to ensure order is preserved expectedLogical = - "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" - + " LogicalFilter(condition=[<=($4, 1)])\n" - + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" - + " LogicalFilter(condition=[IS NOT NULL($0)])\n" - + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + "LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0 NULLS" + + " FIRST)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); } + /** Regression test for https://github.com/opensearch-project/sql/issues/3922 */ + @Test + public void testSortThenDedup() { + String ppl = "source=EMP | sort DEPTNO | dedup 1 JOB | fields DEPTNO, ENAME, JOB"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$7], ENAME=[$1], JOB=[$2])\n" + + " LogicalSort(sort0=[$7], dir0=[ASC-nulls-first])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[<=($8, 1)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION" + + " BY $2 ORDER BY $7 NULLS FIRST)])\n" + + " LogicalFilter(condition=[IS NOT NULL($2)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + // After fix, the sort order (DEPTNO ASC) must be preserved through dedup. + // The correct result has DEPTNO in ascending order: 10, 10, 10, 20, 30. + String expectedResult = + "DEPTNO=10; ENAME=MILLER; JOB=CLERK\n" + + "DEPTNO=10; ENAME=KING; JOB=PRESIDENT\n" + + "DEPTNO=10; ENAME=CLARK; JOB=MANAGER\n" + + "DEPTNO=20; ENAME=SCOTT; JOB=ANALYST\n" + + "DEPTNO=30; ENAME=ALLEN; JOB=SALESMAN\n"; + verifyResult(root, expectedResult); + } + + /** Regression test for https://github.com/opensearch-project/sql/issues/3922 */ + @Test + public void testSortThenDedupWithEval() { + String ppl = + "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields NEW_DEPTNO, EMPNO, ENAME, JOB | sort" + + " NEW_DEPTNO | dedup 1 NEW_DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0 NULLS" + + " FIRST)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + // After fix, the sort order (NEW_DEPTNO ASC) must be preserved through dedup. + // The correct result has NEW_DEPTNO in ascending order: 11, 21, 31. + String expectedResult = + "NEW_DEPTNO=11; EMPNO=7782; ENAME=CLARK; JOB=MANAGER\n" + + "NEW_DEPTNO=21; EMPNO=7369; ENAME=SMITH; JOB=CLERK\n" + + "NEW_DEPTNO=31; EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN\n"; + verifyResult(root, expectedResult); + } + @Test public void testRenameDedup() { String ppl = @@ -261,15 +318,39 @@ public void testRenameDedup() { "source=EMP | eval TEMP_DEPTNO = DEPTNO + 1 | rename TEMP_DEPTNO as NEW_DEPTNO | fields" + " NEW_DEPTNO, EMPNO, ENAME, JOB | sort NEW_DEPTNO | dedup 1 NEW_DEPTNO"; root = getRelNode(ppl); + // Sort is stripped from below the window and moved to the top to ensure order is preserved expectedLogical = - "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" - + " LogicalFilter(condition=[<=($4, 1)])\n" - + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" - + " LogicalFilter(condition=[IS NOT NULL($0)])\n" - + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + "LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0 NULLS" + + " FIRST)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); } + + /** + * Edge case: sort field is projected away before dedup. The sort collation references a field + * (DEPTNO) that is no longer in the schema after the fields command. The dedup should still work + * correctly but without the sort-restore optimization since the sort field is unavailable. + */ + @Test + public void testSortFieldProjectedAwayBeforeDedup() { + String ppl = "source=EMP | sort DEPTNO | fields ENAME, JOB | dedup 1 JOB"; + RelNode root = getRelNode(ppl); + // No restore Sort at top because DEPTNO was projected away + String expectedLogical = + "LogicalProject(ENAME=[$0], JOB=[$1])\n" + + " LogicalFilter(condition=[<=($2, 1)])\n" + + " LogicalProject(ENAME=[$0], JOB=[$1], _row_number_dedup_=[ROW_NUMBER() OVER" + + " (PARTITION BY $1)])\n" + + " LogicalFilter(condition=[IS NOT NULL($1)])\n" + + " LogicalProject(ENAME=[$1], JOB=[$2])\n" + + " LogicalSort(sort0=[$7], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEnhancedCoalesceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEnhancedCoalesceTest.java index 56141eae584..8e54d45ac98 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEnhancedCoalesceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEnhancedCoalesceTest.java @@ -138,7 +138,7 @@ public void testCoalesceWithNonExistentField() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalSort(fetch=[2])\n" - + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:VARCHAR, $1)])\n" + + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:NULL, $1)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -155,7 +155,7 @@ public void testCoalesceWithMultipleNonExistentFields() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalSort(fetch=[1])\n" - + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:VARCHAR, null:VARCHAR, $1," + + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:NULL, null:NULL, $1," + " 'fallback':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -175,8 +175,8 @@ public void testCoalesceWithAllNonExistentFields() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalSort(fetch=[1])\n" - + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:VARCHAR, null:VARCHAR," - + " null:VARCHAR)])\n" + + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:NULL, null:NULL," + + " null:NULL)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -235,4 +235,38 @@ public void testCoalesceTypeInferenceWithNonNullableOperands() { + "LIMIT 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testCoalesceWithNullLiteralAndInteger() { + // Bug #5175: COALESCE(null, 42) previously inferred VARCHAR because the NULL identifier + // was replaced with null:VARCHAR. The result type should be INTEGER so the value comes + // back as an int. + String ppl = "source=EMP | eval result = coalesce(null, 42) | fields EMPNO, result | head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], result=[COALESCE(null:NULL, 42)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(NULL, 42) `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testCoalesceWithIntegerAndNullLiteral() { + // Bug #5175: COALESCE(42, null) should also be typed as INTEGER, not VARCHAR. + String ppl = "source=EMP | eval result = coalesce(42, null) | fields EMPNO, result | head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], result=[COALESCE(42, null:NULL)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, COALESCE(42, NULL) `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java index 70b53d3c6fc..9b37ab5b407 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java @@ -12,6 +12,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; +import org.opensearch.sql.common.error.ErrorReport; public class CalcitePPLEvalTest extends CalcitePPLAbstractTest { @@ -337,9 +338,9 @@ public void testComplexEvalCommands4() { "source=EMP | eval col1 = SAL | sort - col1 | head 3 | fields ENAME, col1 | eval col2 =" + " col1 | sort + col2 | fields ENAME, col2 | eval col3 = col2 | head 2 | fields" + " HIREDATE, col3"; - IllegalArgumentException e = + ErrorReport e = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> { RelNode root = getRelNode(ppl); }); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java index e20bd1b0e47..5bef9c397eb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java @@ -12,6 +12,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; +import org.opensearch.sql.common.error.ErrorReport; public class CalcitePPLFieldFormatTest extends CalcitePPLAbstractTest { @@ -218,9 +219,9 @@ public void testComplexFieldFormatCommands4() { "source=EMP | fieldformat col1 = SAL | sort - col1 | head 3 | fields ENAME, col1 |" + " fieldformat col2 = col1 | sort + col2 | fields ENAME, col2 | fieldformat col3 =" + " col2 | head 2 | fields HIREDATE, col3"; - IllegalArgumentException e = + ErrorReport e = assertThrows( - IllegalArgumentException.class, + ErrorReport.class, () -> { RelNode root = getRelNode(ppl); }); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java index e6cbfefc15a..3f1a1c7c0ab 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java @@ -5,6 +5,8 @@ package org.opensearch.sql.ppl.calcite; +import static org.junit.Assert.assertTrue; + import com.google.common.collect.ImmutableList; import java.util.List; import lombok.RequiredArgsConstructor; @@ -31,7 +33,9 @@ import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Programs; import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Assert; import org.junit.Test; +import org.opensearch.sql.exception.SemanticCheckException; public class CalcitePPLGraphLookupTest extends CalcitePPLAbstractTest { @@ -129,6 +133,39 @@ public void testGraphLookupWithCompoundFilter() { verifyLogical(root, expectedLogical); } + @Test + public void testGraphLookupTopLevelSingleLiteral() { + // Top-level graphLookup with single literal start value + String ppl = + "graphLookup employee start=\"Dev\" edge=reportsTo-->name" + " as reportingHierarchy"; + + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," + + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[0]," + + " bidirectional=[false], startValues=[[Dev]])\n" + + " LogicalValues(tuples=[[]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testGraphLookupTopLevelLiteralList() { + // Top-level graphLookup with multiple literal start values + String ppl = + "graphLookup employee start=\"Dev\", \"Eliot\" edge=reportsTo-->name" + + " as reportingHierarchy"; + + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," + + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[0]," + + " bidirectional=[false], startValues=[[Dev, Eliot]])\n" + + " LogicalValues(tuples=[[]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n"; + verifyLogical(root, expectedLogical); + } + @Test public void testGraphLookupBidirectional() { // Test graphLookup with bidirectional traversal @@ -147,6 +184,33 @@ public void testGraphLookupBidirectional() { verifyLogical(root, expectedLogical); } + @Test + public void testGraphLookupLiteralStartInPipedModeIgnoreChild() { + // Literal start values should not be allowed in piped mode + String ppl = + "source=employee | where name=\"Dev\" | graphLookup employee start=\"Dev\"" + + " edge=reportsTo-->name as reportingHierarchy"; + + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," + + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[0]," + + " bidirectional=[false], startValues=[[Dev]])\n" + + " LogicalValues(tuples=[[]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testGraphLookupFieldStartInTopLevelModeRejectsError() { + // Field reference start should not be allowed in top-level mode (no piped source) + String ppl = + "graphLookup employee start=reportsTo edge=reportsTo-->name" + " as reportingHierarchy"; + + Throwable t = Assert.assertThrows(SemanticCheckException.class, () -> getRelNode(ppl)); + assertTrue(t.getMessage().contains("Field reference start requires a piped source")); + } + @Override protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { final SchemaPlus rootSchema = Frameworks.createRootSchema(true); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java index 5d7669d20a1..d1310ce60dd 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -190,17 +190,21 @@ public void testNoMvInPipeline() { @Test public void testNoMvNonExistentField() { + // After issue #5175 was fixed, missing identifiers inside COALESCE resolve to a null + // literal of SqlTypeName.NULL (instead of VARCHAR). This lets Calcite promote the null + // to the expected array type in ARRAY_COMPACT, so the plan builds successfully and the + // nomv column evaluates to the empty-string fallback from COALESCE. String ppl = "source=EMP | eval arr = array('a', 'b') | nomv does_not_exist | head 1"; + RelNode root = getRelNode(ppl); - Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); - - String msg = String.valueOf(ex.getMessage()); - org.junit.Assert.assertTrue( - "Expected error message to mention missing field or type error. Actual: " + msg, - msg.toLowerCase().contains("does_not_exist") - || msg.toLowerCase().contains("field") - || msg.contains("ARRAY_COMPACT") - || msg.contains("ARRAY")); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array('a', 'b')]," + + " does_not_exist=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(null:ANY ARRAY), '\n" + + "'), '':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); } @Test diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index 179fb3bc830..b9e1040f938 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -9,6 +9,18 @@ import org.apache.calcite.test.CalciteAssert; import org.junit.Test; +/** + * Tests for reverse command optimization. + * + *

    The reverse command behavior depends on the presence of: 1. Existing collation (sort): Reverse + * the sort direction 2. @timestamp field: Sort by @timestamp DESC 3. Neither: No-op (ignore reverse + * command) + * + *

    These tests use SCOTT_WITH_TEMPORAL schema where EMP table has a default collation on EMPNO + * (primary key), demonstrating case #1 (reverse existing collation). + * + *

    For @timestamp and no-op cases, see CalciteReverseCommandIT integration tests. + */ public class CalcitePPLReverseTest extends CalcitePPLAbstractTest { public CalcitePPLReverseTest() { super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); @@ -16,16 +28,11 @@ public CalcitePPLReverseTest() { @Test public void testReverseParserSuccess() { + // EMP table has default collation on EMPNO, so reverse flips it to DESC String ppl = "source=EMP | reverse"; RelNode root = getRelNode(ppl); String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[DESC])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -60,12 +67,7 @@ public void testReverseParserSuccess() { verifyResult(root, expectedResult); String expectedSparkSql = - "" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t0`"; + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC NULLS FIRST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -73,25 +75,13 @@ public void testReverseParserSuccess() { public void testReverseWithSortParserSuccess() { String ppl = "source=EMP | sort ENAME | reverse"; RelNode root = getRelNode(ppl); + // Reverse replaces the existing sort in-place, producing a single sort with reversed direction String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedSparkSql = - "" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY `ENAME`) `t0`\n" - + "ORDER BY `__reverse_row_num__` DESC NULLS FIRST"; + String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `ENAME` DESC"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -99,28 +89,13 @@ public void testReverseWithSortParserSuccess() { public void testDoubleReverseParserSuccess() { String ppl = "source=EMP | reverse | reverse"; RelNode root = getRelNode(ppl); + // Double reverse: first reverse flips ASC->DESC, second reverse flips DESC->ASC + // Result is back to original order with a single sort node String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[ASC])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t0`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t2`"; + String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -129,13 +104,8 @@ public void testReverseWithHeadParserSuccess() { String ppl = "source=EMP | reverse | head 2"; RelNode root = getRelNode(ppl); String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC], fetch=[2])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[DESC], fetch=[2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -146,12 +116,7 @@ public void testReverseWithHeadParserSuccess() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST\n" - + "LIMIT 2) `t0`"; + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC NULLS FIRST\n" + "LIMIT 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -178,4 +143,291 @@ public void testReverseWithExpressionShouldFail() { String ppl = "source=EMP | reverse EMPNO + 1"; getRelNode(ppl); } + + @Test + public void testMultipleSortsWithReverseParserSuccess() { + String ppl = "source=EMP | sort + SAL | sort - ENAME | reverse"; + RelNode root = getRelNode(ppl); + // Reverse replaces the last sort (- ENAME DESC) in-place, flipping to ASC + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `SAL`) `t`\n" + + "ORDER BY `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultiFieldSortWithReverseParserSuccess() { + String ppl = "source=EMP | sort + SAL, - ENAME | reverse"; + RelNode root = getRelNode(ppl); + // Reverse replaces the multi-field sort in-place, flipping each field's direction + String expectedLogical = + "LogicalSort(sort0=[$5], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `SAL` DESC, `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testComplexMultiFieldSortWithReverseParserSuccess() { + String ppl = "source=EMP | sort DEPTNO, + SAL, - ENAME | reverse"; + RelNode root = getRelNode(ppl); + // Reverse replaces the 3-field sort in-place, flipping each direction + String expectedLogical = + "LogicalSort(sort0=[$7], sort1=[$5], sort2=[$1], dir0=[DESC-nulls-last]," + + " dir1=[DESC-nulls-last], dir2=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `DEPTNO` DESC, `SAL` DESC, `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReverseWithFieldsAndSortParserSuccess() { + String ppl = "source=EMP | fields ENAME, SAL, DEPTNO | sort + SAL | reverse"; + RelNode root = getRelNode(ppl); + // Reverse replaces the sort on SAL in-place + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + + " LogicalProject(ENAME=[$1], SAL=[$5], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `ENAME`, `SAL`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `SAL` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testSortHeadReverse() { + // Tests "sort | head | reverse": reverse must be applied after the limit, + // not merged into the sort+fetch node, to preserve correct semantics. + String ppl = "source=EMP | sort SAL | head 5 | reverse"; + RelNode root = getRelNode(ppl); + + // The reversed sort sits above the limit+sort node + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first], fetch=[5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testHeadThenSortReverseNoOpt() { + // Tests fetch limit behavior: head 5 | sort field | reverse + // Reverse replaces the sort on SAL in-place, preserving the head limit below + String ppl = "source=EMP | head 5 | sort + SAL | reverse"; + RelNode root = getRelNode(ppl); + + // Two LogicalSort nodes: reversed sort on SAL, then fetch=5 + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + + " LogicalSort(fetch=[5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 5) `t`\n" + + "ORDER BY `SAL` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testSortFieldsReverse() { + // Test backtracking: sort on SAL, then project only ENAME, then reverse + // The sort field (SAL) is removed from schema by fields command + // But reverse should still work by backtracking to find the sort and replacing it in-place + String ppl = "source=EMP | sort SAL | fields ENAME | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(ENAME=[$1])\n" + + " LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = "SELECT `ENAME`\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `SAL` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + // ==================== Complex query tests with blocking operators ==================== + // These tests verify that reverse becomes a no-op after blocking operators + // that destroy collation (aggregate, join, set ops, window functions). + // Since SCOTT_WITH_TEMPORAL schema has no @timestamp field, reverse is ignored. + + @Test + public void testReverseAfterAggregationIsNoOp() { + // Aggregation destroys input ordering, so reverse has no collation to reverse + // and no @timestamp field exists, so reverse should be a no-op + String ppl = "source=EMP | stats count() as c by DEPTNO | reverse"; + RelNode root = getRelNode(ppl); + // No additional sort node for reverse - it's a no-op after aggregation + // Note: There's a project for column reordering (c, DEPTNO) in the output + String expectedLogical = + "LogicalProject(c=[$1], DEPTNO=[$0])\n" + + " LogicalAggregate(group=[{0}], c=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT COUNT(*) `c`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReverseAfterJoinIsNoOp() { + // Join destroys input ordering, so reverse has no collation to reverse + // and no @timestamp field exists, so reverse should be a no-op + String ppl = "source=EMP | join on EMP.DEPTNO = DEPT.DEPTNO DEPT | reverse"; + RelNode root = getRelNode(ppl); + // No additional sort node for reverse - it's a no-op after join + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], DEPT.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `DEPT`.`DEPTNO` `DEPT.DEPTNO`," + + " `DEPT`.`DNAME`, `DEPT`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReverseAfterSortAndAggregationIsNoOp() { + // Even if there's a sort before aggregation, aggregation destroys the collation + // so reverse after aggregation should be a no-op + String ppl = "source=EMP | sort SAL | stats count() as c by DEPTNO | reverse"; + RelNode root = getRelNode(ppl); + // Sort before aggregation is present, but reverse after aggregation is a no-op + // Note: There's a project for column reordering (c, DEPTNO) in the output + String expectedLogical = + "LogicalProject(c=[$1], DEPTNO=[$0])\n" + + " LogicalAggregate(group=[{0}], c=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + // Verify result data - reverse is a no-op, so data remains in aggregation order + String expectedResult = "c=5; DEPTNO=20\n" + "c=3; DEPTNO=10\n" + "c=6; DEPTNO=30\n"; + verifyResult(root, expectedResult); + } + + @Test + public void testReverseAfterWhereWithSort() { + // Filter (where) doesn't destroy collation, so reverse should work through it + String ppl = "source=EMP | sort SAL | where DEPTNO = 10 | reverse"; + RelNode root = getRelNode(ppl); + // Reverse backtracks through filter to find the sort and inserts reversed sort + // after the original sort, then the filter is applied on top + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `SAL`) `t`\n" + + "WHERE `DEPTNO` = 10\n" + + "ORDER BY `SAL` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReverseAfterEvalWithSort() { + // Eval (project) doesn't destroy collation, so reverse should work through it + String ppl = "source=EMP | sort SAL | eval bonus = SAL * 0.1 | reverse"; + RelNode root = getRelNode(ppl); + // Reversed sort is added on top of the project (eval) + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], bonus=[*($5, 0.1:DECIMAL(2, 1))])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testReverseAfterMultipleFiltersWithSort() { + // Multiple filters don't destroy collation (Calcite merges consecutive filters) + String ppl = "source=EMP | sort SAL | where DEPTNO = 10 | where SAL > 1000 | reverse"; + RelNode root = getRelNode(ppl); + // Reversed sort is added on top of the merged filter + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + + " LogicalFilter(condition=[AND(=($7, 10), >($5, 1000))])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testReverseSortJoinSort() { + // Sort before join, then another sort after join, reverse should work + String ppl = + "source=EMP | sort SAL | join on EMP.DEPTNO = DEPT.DEPTNO DEPT | sort DNAME | reverse"; + RelNode root = getRelNode(ppl); + // The sort before join is destroyed by join, but sort after join can be reversed + // Reverse replaces the sort on DNAME in-place + String expectedLogical = + "LogicalSort(sort0=[$9], dir0=[DESC-nulls-last])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], DEPT.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testReverseAfterAggregationWithSort() { + // Sort after aggregation, then reverse should work + // Reverse replaces the sort on DEPTNO in-place + String ppl = "source=EMP | stats count() as c by DEPTNO | sort DEPTNO | reverse"; + RelNode root = getRelNode(ppl); + // Note: There's a project for column reordering (c, DEPTNO) so DEPTNO is at position 1 + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + + " LogicalProject(c=[$1], DEPTNO=[$0])\n" + + " LogicalAggregate(group=[{0}], c=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT COUNT(*) `c`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `DEPTNO`\n" + + "ORDER BY `DEPTNO` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java index 9967b10543e..879d48bc4de 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java @@ -123,6 +123,34 @@ public void testSpathAutoExtractModeWithFields() { + "FROM `scott`.`EMP`"); } + @Test + public void testSpathAutoExtractWithMultiFieldEval() { + // Issue #5185: eval with multiple dotted-path assignments from MAP column + // should not remove the MAP root field + withPPLQuery( + "source=EMP | spath input=ENAME" + + " | eval ENAME.user.name=ENAME.user.name, ENAME.user.age=ENAME.user.age" + + " | fields ENAME.user.name, ENAME.user.age") + .expectLogical( + "LogicalProject(ENAME.user.name=[ITEM(JSON_EXTRACT_ALL($1), 'user.name')]," + + " ENAME.user.age=[ITEM(JSON_EXTRACT_ALL($1), 'user.age')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"); + } + + @Test + public void testSpathAutoExtractWithSeparateEvalCommands() { + // Issue #5185: separate eval commands with dotted-path assignments from MAP column + withPPLQuery( + "source=EMP | spath input=ENAME" + + " | eval ENAME.user.name=ENAME.user.name" + + " | eval ENAME.user.age=ENAME.user.age" + + " | fields ENAME.user.name, ENAME.user.age") + .expectLogical( + "LogicalProject(ENAME.user.name=[ITEM(JSON_EXTRACT_ALL($1), 'user.name')]," + + " ENAME.user.age=[ITEM(JSON_EXTRACT_ALL($1), 'user.age')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"); + } + @Test public void testSpathAutoExtractModeWithSort() { withPPLQuery("source=EMP | spath input=ENAME output=result" + " | sort result.user.name") diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index 48c0e5cfa62..2e4b6a605dd 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -5,6 +5,10 @@ package org.opensearch.sql.ppl.calcite; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; @@ -92,41 +96,21 @@ public void testStreamstatsCurrent() { public void testStreamstatsWindow() { String ppl = "source=EMP | streamstats window = 5 max(SAL) by DEPTNO"; RelNode root = getRelNode(ppl); + // Uses self-join plan to avoid nested correlates that cause NPE in Calcite's decorrelator String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" + " LogicalSort(sort0=[$8], dir0=[ASC])\n" - + " LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7," - + " 8}])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " LogicalAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}], max(SAL)=[MAX($11)])\n" + + " LogicalJoin(condition=[AND(>=($9, -($8, 4)), <=($9, $8), IS NOT DISTINCT" + + " FROM($7, $10))], joinType=[left])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalAggregate(group=[{}], max(SAL)=[MAX($0)])\n" - + " LogicalProject(SAL=[$5])\n" - + " LogicalFilter(condition=[AND(>=($8, -($cor0.__stream_seq__, 4)), <=($8," - + " $cor0.__stream_seq__), OR(=($7, $cor0.DEPTNO), AND(IS NULL($7), IS" - + " NULL($cor0.DEPTNO))))])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," - + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER" - + " ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_DEPTNO__=[$7]," + + " __r_SAL__=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - - String expectedSparkSql = - "SELECT `$cor0`.`EMPNO`, `$cor0`.`ENAME`, `$cor0`.`JOB`, `$cor0`.`MGR`, `$cor0`.`HIREDATE`," - + " `$cor0`.`SAL`, `$cor0`.`COMM`, `$cor0`.`DEPTNO`, `t3`.`max(SAL)`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`\n" - + "FROM `scott`.`EMP`) `$cor0`,\n" - + "LATERAL (SELECT MAX(`SAL`) `max(SAL)`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`\n" - + "FROM `scott`.`EMP`) `t0`\n" - + "WHERE `__stream_seq__` >= `$cor0`.`__stream_seq__` - 4 AND `__stream_seq__` <=" - + " `$cor0`.`__stream_seq__` AND (`DEPTNO` = `$cor0`.`DEPTNO` OR `DEPTNO` IS NULL AND" - + " `$cor0`.`DEPTNO` IS NULL)) `t3`\n" - + "ORDER BY `$cor0`.`__stream_seq__` NULLS LAST"; - verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -222,4 +206,48 @@ public void testStreamstatsReset() { + "ORDER BY `$cor0`.`__stream_seq__` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testMultipleStreamstatsWithWindow() { + String ppl = + "source=EMP | streamstats window=2 avg(SAL) as avg_sal by DEPTNO" + + " | streamstats window=2 avg(avg_sal) as avg_dept_sal by DEPTNO"; + RelNode root = getRelNode(ppl); + assertNotNull("Chained streamstats with window should produce a valid plan", root); + // Verify the plan uses self-join (LogicalJoin) instead of LogicalCorrelate + String plan = root.explain(); + assertTrue( + "Plan should contain LogicalJoin for self-join approach", plan.contains("LogicalJoin")); + assertFalse( + "Plan should not contain LogicalCorrelate for window+group streamstats", + plan.contains("LogicalCorrelate")); + } + + @Test + public void testStreamstatsWithReverse() { + String ppl = "source=EMP | streamstats max(SAL) by DEPTNO | reverse"; + RelNode root = getRelNode(ppl); + // Reverse replaces the __stream_seq__ sort in-place via backtracking + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" + + " LogicalSort(sort0=[$8], dir0=[DESC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" + + " (PARTITION BY $7 ROWS UNBOUNDED PRECEDING)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ROWS BETWEEN UNBOUNDED" + + " PRECEDING AND CURRENT ROW) `max(SAL)`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " ROW_NUMBER() OVER () `__stream_seq__`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "ORDER BY `__stream_seq__` DESC NULLS FIRST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java index ca0ff70f0b7..167e1e3c4a5 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java @@ -53,13 +53,28 @@ protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpec ImmutableList rows = ImmutableList.of( new Object[] { - java.sql.Timestamp.valueOf("2024-07-01 00:00:00"), "web-01", "us-east", 45.2, 120 + java.sql.Timestamp.valueOf("2024-07-01 00:00:00"), + java.sql.Timestamp.valueOf("2024-01-15 10:00:00"), + "web-01", + "us-east", + 45.2, + 120 }, new Object[] { - java.sql.Timestamp.valueOf("2024-07-01 00:01:00"), "web-02", "us-west", 38.7, 150 + java.sql.Timestamp.valueOf("2024-07-01 00:01:00"), + java.sql.Timestamp.valueOf("2024-02-20 11:00:00"), + "web-02", + "us-west", + 38.7, + 150 }, new Object[] { - java.sql.Timestamp.valueOf("2024-07-01 00:02:00"), "web-01", "us-east", 55.3, 200 + java.sql.Timestamp.valueOf("2024-07-01 00:02:00"), + java.sql.Timestamp.valueOf("2024-03-25 12:00:00"), + "web-01", + "us-east", + 55.3, + 200 }); schema.add("events", new EventsTable(rows)); return Frameworks.newConfigBuilder() @@ -347,6 +362,62 @@ public void testTimechartUsingZeroSpanShouldThrow() { verifyErrorMessageContains(t, "Zero or negative time interval not supported: 0h"); } + // ==================== Timechart with Reverse tests ==================== + // These tests verify that reverse works correctly with timechart. + // Timechart always adds a sort at the end of its plan, so reverse will + // find the collation via metadata query (tier 1) and flip the sort direction. + + @Test + public void testTimechartWithReverse() { + // Timechart adds ORDER BY @timestamp ASC at the end + // Reverse should flip it to DESC + String ppl = "source=events | timechart count() | reverse"; + RelNode root = getRelNode(ppl); + // Reverse replaces the timechart's ASC sort in-place with DESC + String expectedLogical = + "LogicalSort(sort0=[$0], dir0=[DESC])\n" + + " LogicalProject(@timestamp=[$0], count()=[$1])\n" + + " LogicalAggregate(group=[{0}], count()=[COUNT()])\n" + + " LogicalProject(@timestamp0=[SPAN($0, 1, 'm')])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalTableScan(table=[[scott, events]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT SPAN(`@timestamp`, 1, 'm') `@timestamp`, COUNT(*) `count()`\n" + + "FROM `scott`.`events`\n" + + "WHERE `@timestamp` IS NOT NULL\n" + + "GROUP BY SPAN(`@timestamp`, 1, 'm')\n" + + "ORDER BY 1 DESC NULLS FIRST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTimechartWithCustomTimefieldAndReverse() { + // Timechart with custom timefield should also work with reverse + // The sort is on created_at (the custom field), not @timestamp + String ppl = "source=events | timechart timefield=created_at span=1month count() | reverse"; + RelNode root = getRelNode(ppl); + + // Reverse replaces the timechart's ASC sort in-place with DESC + String expectedLogical = + "LogicalSort(sort0=[$0], dir0=[DESC])\n" + + " LogicalProject(created_at=[$0], count()=[$1])\n" + + " LogicalAggregate(group=[{0}], count()=[COUNT()])\n" + + " LogicalProject(created_at0=[SPAN($1, 1, 'M')])\n" + + " LogicalFilter(condition=[IS NOT NULL($1)])\n" + + " LogicalTableScan(table=[[scott, events]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT SPAN(`created_at`, 1, 'M') `created_at`, COUNT(*) `count()`\n" + + "FROM `scott`.`events`\n" + + "WHERE `created_at` IS NOT NULL\n" + + "GROUP BY SPAN(`created_at`, 1, 'M')\n" + + "ORDER BY 1 DESC NULLS FIRST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + private UnresolvedPlan parsePPL(String query) { PPLSyntaxParser parser = new PPLSyntaxParser(); AstBuilder astBuilder = new AstBuilder(query); @@ -363,6 +434,8 @@ public static class EventsTable implements ScannableTable { .builder() .add("@timestamp", SqlTypeName.TIMESTAMP) .nullable(true) + .add("created_at", SqlTypeName.TIMESTAMP) + .nullable(true) .add("host", SqlTypeName.VARCHAR) .nullable(true) .add("region", SqlTypeName.VARCHAR) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java index b6b60c530e7..69bc1ae2638 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -24,12 +24,13 @@ public void testSimpleCountWithTranspose() { + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + " 5_null=[MAX($0) FILTER $6])\n" - + " LogicalProject(value=[CAST($3):VARCHAR NOT NULL], $f4=[TRIM(FLAG(BOTH), ' '," + + " LogicalProject(_value_transpose_=[CAST($3):VARCHAR NOT NULL]," + + " $f4=[TRIM(FLAG(BOTH), ' '," + " $2)], $f5=[=($1, 1)], $f6=[=($1, 2)], $f7=[=($1, 3)], $f8=[=($1, 4)], $f9=[=($1," + " 5)])\n" + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + " LogicalProject(c=[$0], _row_number_transpose_=[$1], column=[$2]," - + " value=[CASE(=($2, 'c'), CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + + " _value_transpose_=[CASE(=($2, 'c'), CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" + " LogicalProject(c=[$0], _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + " LogicalAggregate(group=[{}], c=[COUNT()])\n" @@ -40,18 +41,23 @@ public void testSimpleCountWithTranspose() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + " `_row_number_transpose_` = 5) `row 5`\n" + "FROM (SELECT `t0`.`c`, `t0`.`_row_number_transpose_`, `t1`.`column`, CASE WHEN" - + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END" + + " `_value_transpose_`\n" + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t0`\n" + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" - + "WHERE `t2`.`value` IS NOT NULL\n" + + "WHERE `t2`.`_value_transpose_` IS NOT NULL\n" + "GROUP BY TRIM(`column`)"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -68,12 +74,13 @@ public void testMultipleAggregatesWithAliasesTranspose() { + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + " 5_null=[MAX($0) FILTER $6])\n" - + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," - + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)], $f11=[=($4, 4)], $f12=[=($4," - + " 5)])\n" + + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," + + " $f7=[TRIM(FLAG(BOTH), ' '," + + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)], $f11=[=($4, 4)]," + + " $f12=[=($4, 5)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," - + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'avg_sal')," + + " _row_number_transpose_=[$4], column=[$5], _value_transpose_=[CASE(=($5, 'avg_sal')," + " NUMBER_TO_STRING($0), =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal')," + " NUMBER_TO_STRING($2), =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" @@ -95,18 +102,22 @@ public void testMultipleAggregatesWithAliasesTranspose() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + " `_row_number_transpose_` = 5) `row 5`\n" + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + " `t1`.`_row_number_transpose_`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal'" + " THEN NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" - + " STRING) ELSE NULL END `value`\n" + + " STRING) ELSE NULL END `_value_transpose_`\n" + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t1`\n" @@ -114,7 +125,7 @@ public void testMultipleAggregatesWithAliasesTranspose() { + "('max_sal'),\n" + "('min_sal'),\n" + "('cnt')) `t2` (`column`)) `t3`\n" - + "WHERE `t3`.`value` IS NOT NULL\n" + + "WHERE `t3`.`_value_transpose_` IS NOT NULL\n" + "GROUP BY TRIM(`column`)"; /* @@ -152,11 +163,12 @@ public void testTransposeWithLimit() { "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" - + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," + + " $f7=[TRIM(FLAG(BOTH), ' '," + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," - + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'ENAME')," + + " _row_number_transpose_=[$4], column=[$5], _value_transpose_=[CASE(=($5, 'ENAME')," + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" @@ -176,16 +188,18 @@ public void testTransposeWithLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + "SELECT TRIM(`column`) `column`, MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`_value_transpose_` AS STRING))" + + " FILTER (WHERE" + " `_row_number_transpose_` = 3) `row 3`\n" + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + " `t`.`_row_number_transpose_`, `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN" + " CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM' THEN" + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB' THEN CAST(`t`.`JOB` AS" + " STRING) WHEN `t0`.`column` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END" - + " `value`\n" + + " `_value_transpose_`\n" + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -193,12 +207,26 @@ public void testTransposeWithLimit() { + "('COMM'),\n" + "('JOB'),\n" + "('SAL')) `t0` (`column`)) `t1`\n" - + "WHERE `t1`.`value` IS NOT NULL\n" + + "WHERE `t1`.`_value_transpose_` IS NOT NULL\n" + "GROUP BY TRIM(`column`)"; verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testTransposeWithValueFieldNameCollision() { + // Reproduce issue #5172: hardcoded 'value' unpivot column collides with + // input field named 'value' + String ppl = "source=EMP | stats count() as value, avg(SAL) as avg_sal | transpose"; + RelNode root = getRelNode(ppl); + // The 'value' field from stats should appear correctly in transposed output + // and not be confused with the internal unpivot 'value' column + String expectedResult = + "column=avg_sal; row 1=2073.214285; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=value; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n"; + verifyResult(root, expectedResult); + } + @Test public void testTransposeWithLimitColumnName() { String ppl = @@ -208,11 +236,13 @@ public void testTransposeWithLimitColumnName() { "LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" - + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " LogicalProject(_value_transpose_=[CAST($6):VARCHAR NOT NULL]," + + " $f7=[TRIM(FLAG(BOTH), ' '," + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," - + " _row_number_transpose_=[$4], column_names=[$5], value=[CASE(=($5, 'ENAME')," + + " _row_number_transpose_=[$4], column_names=[$5]," + + " _value_transpose_=[CASE(=($5, 'ENAME')," + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + " LogicalJoin(condition=[true], joinType=[inner])\n" @@ -231,16 +261,19 @@ public void testTransposeWithLimitColumnName() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TRIM(`column_names`) `column_names`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" - + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + "SELECT TRIM(`column_names`) `column_names`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`," + + " MAX(CAST(`_value_transpose_` AS STRING)) FILTER (WHERE" + " `_row_number_transpose_` = 3) `row 3`\n" + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + " `t`.`_row_number_transpose_`, `t0`.`column_names`, CASE WHEN `t0`.`column_names` =" + " 'ENAME' THEN CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN" + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB`" + " AS STRING) WHEN `t0`.`column_names` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE" - + " NULL END `value`\n" + + " NULL END `_value_transpose_`\n" + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -248,7 +281,7 @@ public void testTransposeWithLimitColumnName() { + "('COMM'),\n" + "('JOB'),\n" + "('SAL')) `t0` (`column_names`)) `t1`\n" - + "WHERE `t1`.`value` IS NOT NULL\n" + + "WHERE `t1`.`_value_transpose_` IS NOT NULL\n" + "GROUP BY TRIM(`column_names`)"; verifyPPLToSparkSQL(root, expectedSparkSql); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLUnionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLUnionTest.java new file mode 100644 index 00000000000..a16e0e6a6be --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLUnionTest.java @@ -0,0 +1,591 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.sql.Timestamp; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; + +public class CalcitePPLUnionTest extends CalcitePPLAbstractTest { + + public CalcitePPLUnionTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + + ImmutableList timeData1 = + ImmutableList.of( + new Object[] { + Timestamp.valueOf("2025-08-01 03:47:41"), + 8762, + "A", + Timestamp.valueOf("2025-08-01 03:47:41") + }, + new Object[] { + Timestamp.valueOf("2025-08-01 01:14:11"), + 9015, + "B", + Timestamp.valueOf("2025-08-01 01:14:11") + }, + new Object[] { + Timestamp.valueOf("2025-07-31 23:40:33"), + 8676, + "A", + Timestamp.valueOf("2025-07-31 23:40:33") + }, + new Object[] { + Timestamp.valueOf("2025-07-31 21:07:03"), + 8490, + "B", + Timestamp.valueOf("2025-07-31 21:07:03") + }); + + ImmutableList timeData2 = + ImmutableList.of( + new Object[] { + Timestamp.valueOf("2025-08-01 04:00:00"), + 2001, + "E", + Timestamp.valueOf("2025-08-01 04:00:00") + }, + new Object[] { + Timestamp.valueOf("2025-08-01 02:30:00"), + 2002, + "F", + Timestamp.valueOf("2025-08-01 02:30:00") + }, + new Object[] { + Timestamp.valueOf("2025-08-01 01:00:00"), + 2003, + "E", + Timestamp.valueOf("2025-08-01 01:00:00") + }, + new Object[] { + Timestamp.valueOf("2025-07-31 22:15:00"), + 2004, + "F", + Timestamp.valueOf("2025-07-31 22:15:00") + }); + + ImmutableList nonTimeData = + ImmutableList.of( + new Object[] {1001, "Product A", 100.0}, new Object[] {1002, "Product B", 200.0}); + + schema.add("TIME_DATA1", new TimeDataTable(timeData1)); + schema.add("TIME_DATA2", new TimeDataTable(timeData2)); + schema.add("NON_TIME_DATA", new NonTimeDataTable(nonTimeData)); + + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testBasicUnionTwoDatasets() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10] " + + "[search source=EMP | where DEPTNO = 20]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "UNION ALL\n" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 20"; + verifyPPLToSparkSQL(root, expectedSparkSql); + verifyResultCount(root, 8); + } + + @Test + public void testUnionThreeDatasets() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10] " + + "[search source=EMP | where DEPTNO = 20] " + + "[search source=EMP | where DEPTNO = 30]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 30)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "UNION ALL\n" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 20\n" + + "UNION ALL\n" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 30"; + verifyPPLToSparkSQL(root, expectedSparkSql); + verifyResultCount(root, 14); + } + + @Test + public void testUnionCrossIndicesSchemaDifference() { + String ppl = + "| union [search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME," + + " JOB] [search source=DEPT | where DEPTNO = 10 | fields DEPTNO, DNAME, LOC]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[null:TINYINT]," + + " DNAME=[null:VARCHAR(14)], LOC=[null:VARCHAR(13)])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + + " LogicalFilter(condition=[=($0, 10)])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, CAST(NULL AS TINYINT) `DEPTNO`, CAST(NULL AS STRING)" + + " `DNAME`, CAST(NULL AS STRING) `LOC`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "UNION ALL\n" + + "SELECT CAST(NULL AS SMALLINT) `EMPNO`, CAST(NULL AS STRING) `ENAME`, CAST(NULL AS" + + " STRING) `JOB`, `DEPTNO`, `DNAME`, `LOC`\n" + + "FROM `scott`.`DEPT`\n" + + "WHERE `DEPTNO` = 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + verifyResultCount(root, 4); + } + + @Test + public void testUnionWithStats() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | eval type = \"accounting\"] " + + "[search source=EMP | where DEPTNO = 20 | eval type = \"research\"] " + + "| stats count by type"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(count=[$1], type=[$0])\n" + + " LogicalAggregate(group=[{0}], count=[COUNT()])\n" + + " LogicalProject(type=[$8])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], type=['accounting':VARCHAR])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], type=['research':VARCHAR])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT COUNT(*) `count`, `type`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " 'accounting' `type`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "UNION ALL\n" + + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " 'research' `type`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 20) `t3`\n" + + "GROUP BY `type`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + verifyResultCount(root, 2); + } + + @Test + public void testUnionDirectTableNames() { + String ppl = "| union EMP, DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], DNAME=[null:VARCHAR(14)]," + + " LOC=[null:VARCHAR(13)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)]," + + " JOB=[null:VARCHAR(9)], MGR=[null:SMALLINT], HIREDATE=[null:DATE]," + + " SAL=[null:DECIMAL(7, 2)], COMM=[null:DECIMAL(7, 2)], DEPTNO=[CAST($0):TINYINT]," + + " DNAME=[$1], LOC=[$2])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testUnionNonStreamingModeAppend() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME] " + + "[search source=NON_TIME_DATA | fields id, name]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], id=[null:INTEGER], name=[null:VARCHAR])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)], id=[$0]," + + " name=[$1])\n" + + " LogicalTableScan(table=[[scott, NON_TIME_DATA]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testUnionWithMaxout() { + String ppl = + "| union maxout=5 " + + "[search source=EMP | where DEPTNO = 10] " + + "[search source=EMP | where DEPTNO = 20]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSystemLimit(fetch=[5], type=[SUBSEARCH_MAXOUT])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testUnionWithIdenticalSchemasAndFieldProjection() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME], " + + "[search source=EMP | where DEPTNO = 20 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 8); + } + + @Test + public void testUnionAsFirstCommand() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME] " + + "[search source=EMP | where DEPTNO = 20 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 8); + } + + @Test + public void testUnionWithCompletelyDifferentSchemas() { + String ppl = + "| union " + + "[search source=EMP | fields EMPNO, ENAME] " + + "[search source=DEPT | fields DEPTNO, DNAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], DEPTNO=[null:TINYINT]," + + " DNAME=[null:VARCHAR(14)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[null:SMALLINT], ENAME=[null:VARCHAR(10)], DEPTNO=[$0]," + + " DNAME=[$1])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 18); + } + + @Test + public void testUnionWithPartialSchemaOverlap() { + String ppl = + "| union " + + "[search source=EMP | fields EMPNO, ENAME, JOB] " + + "[search source=EMP | fields EMPNO, ENAME, SAL]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], SAL=[null:DECIMAL(7, 2)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[null:VARCHAR(9)], SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 28); + } + + @Test + public void testUnionWithFilteredSubsearches() { + String ppl = + "| union " + + "[search source=EMP | where SAL > 2000 | fields EMPNO, ENAME] " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[>($5, 2000)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testUnionPreservesDuplicateRows() { + String ppl = + "| union " + + "[search source=EMP | where EMPNO = 7369 | fields EMPNO, ENAME] " + + "[search source=EMP | where EMPNO = 7369 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($0, 7369)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($0, 7369)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 2); + } + + @Test + public void testUnionWithEmptyDataset() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME] " + + "[search source=EMP | where DEPTNO = 99 | fields EMPNO, ENAME]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 99)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 3); + } + + @Test + public void testUnionFollowedByAggregation() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME], " + + "[search source=EMP | where DEPTNO = 20 | fields EMPNO, ENAME] " + + "| stats count()"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{}], count()=[COUNT()])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 1); + } + + @Test + public void testUnionFollowedBySort() { + String ppl = + "| union " + + "[search source=EMP | where DEPTNO = 10 | fields EMPNO, ENAME] " + + "[search source=EMP | where DEPTNO = 20 | fields EMPNO, ENAME] " + + "| sort ENAME"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalUnion(all=[true])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 8); + } + + @RequiredArgsConstructor + static class TimeDataTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .add("value", SqlTypeName.INTEGER) + .nullable(true) + .add("category", SqlTypeName.VARCHAR) + .nullable(true) + .add("@timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } + + @RequiredArgsConstructor + static class NonTimeDataTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("id", SqlTypeName.INTEGER) + .nullable(true) + .add("name", SqlTypeName.VARCHAR) + .nullable(true) + .add("value", SqlTypeName.DOUBLE) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index a53e4a5d8dd..e7f3f986752 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1828,4 +1828,39 @@ public void testEmptyPipeAndTrailingPipeTogether() { public void testMalformedPipeProducesSyntaxError() { plan("source=t | invalidCmd |"); } + + @Test + public void testUnionWithSubsearches() { + plan("| union [search source=t1 | where age > 30] " + "[search source=t2 | where age < 20]"); + } + + @Test + public void testUnionWithDirectTableNames() { + plan("| union t1, t2"); + } + + @Test + public void testUnionWithDateSuffixIndex() { + plan("| union logs-2024.01.01, logs-2024.01.02"); + } + + @Test + public void testUnionWithDottedCatalogPath() { + plan("| union catalog.my_index, catalog.other_index"); + } + + @Test + public void testUnionMidPipeline() { + plan("source=t1 | union t2, t3"); + } + + @Test + public void testUnionWithMaxoutOption() { + plan("| union maxout=500 t1, t2"); + } + + @Test + public void testMaxoutAsFieldName() { + plan("source=t | eval maxout = 1"); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index af10b53defb..ce7a120ff56 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -299,6 +299,24 @@ public void testBooleanIsNotNullFunction() { filter(relation("t"), function("is not null", field("a")))); } + @Test + public void testIsNullPredicate() { + assertEqual( + "source=t | where a is null", filter(relation("t"), function("is null", field("a")))); + assertEqual( + "source=t | where a IS NULL", filter(relation("t"), function("is null", field("a")))); + } + + @Test + public void testIsNotNullPredicate() { + assertEqual( + "source=t | where a is not null", + filter(relation("t"), function("is not null", field("a")))); + assertEqual( + "source=t | where a IS NOT NULL", + filter(relation("t"), function("is not null", field("a")))); + } + /** Todo. search operator should not include functionCall, need to change antlr. */ @Ignore("search operator should not include functionCall, need to change antlr") public void testEvalExpr() { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index bb720bd4207..585575b2b24 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -710,6 +710,27 @@ public void testGraphLookup() { + " filter=(status = 'active' AND id > 2) as reportingHierarchy")); } + @Test + public void testGraphLookupTopLevel() { + // Top-level graphLookup with single literal + assertEquals( + "graphlookup table start=*** edge=identifier-->identifier as identifier", + anonymize( + "graphLookup employees start=\"Jack\" edge=manager-->name" + " as reportingHierarchy")); + // Top-level graphLookup with literal list + assertEquals( + "graphlookup table start=***, *** edge=identifier-->identifier as identifier", + anonymize( + "graphLookup employees start=\"Jack\", \"Eliot\" edge=manager-->name" + + " as reportingHierarchy")); + // Top-level graphLookup with maxDepth + assertEquals( + "graphlookup table start=*** edge=identifier-->identifier maxDepth=*** as identifier", + anonymize( + "graphLookup employees start=\"Jack\" edge=manager-->name" + + " maxDepth=3 as reportingHierarchy")); + } + @Test public void testInSubquery() { assertEquals( @@ -1126,6 +1147,26 @@ public void testConvertCommand() { assertEquals( "source=table | convert (identifier) AS identifier", anonymize("source=t | convert none(empno) AS empno_same")); + assertEquals( + "source=table | convert dur2sec(identifier)", + anonymize("source=t | convert dur2sec(duration)")); + assertEquals( + "source=table | convert mstime(identifier)", + anonymize("source=t | convert mstime(elapsed)")); + assertEquals( + "source=table | convert memk(identifier) AS identifier", + anonymize("source=t | convert memk(virt) AS virt_kb")); + } + + @Test + public void testConvertCommandWithTimeformat() { + assertEquals( + "source=table | convert timeformat=\"%Y-%m-%d\" mktime(identifier)", + anonymize("source=t | convert timeformat=\"%Y-%m-%d\" mktime(date_str)")); + assertEquals( + "source=table | convert timeformat=\"%m/%d/%Y %H:%M:%S\" ctime(identifier) AS identifier", + anonymize( + "source=t | convert timeformat=\"%m/%d/%Y %H:%M:%S\" ctime(ts) AS formatted_time")); } @Test @@ -1139,4 +1180,28 @@ public void testMvexpandCommandWithLimit() { "source=table | mvexpand identifier limit=***", anonymize("source=t | mvexpand skills limit=5")); } + + @Test + public void testUnion() { + assertEquals( + "| union [search source=table | where identifier < ***] [search source=table |" + + " where identifier >= ***]", + anonymize( + "| union [search source=accounts | where age < 30] [search source=accounts" + + " | where age >= 30]")); + + assertEquals( + "| union [search source=table | where identifier > ***] [search source=table |" + + " where identifier = ***]", + anonymize( + "| union [search source=accounts | where balance > 20000] [search" + + " source=accounts | where state = 'CA']")); + + assertEquals( + "| union [search source=table | fields + identifier,identifier] [search" + + " source=table | where identifier = ***]", + anonymize( + "| union [search source=accounts | fields firstname, lastname] [search" + + " source=accounts | where age = 25]")); + } } diff --git a/release-notes/opensearch-sql.release-notes-3.6.0.0.md b/release-notes/opensearch-sql.release-notes-3.6.0.0.md new file mode 100644 index 00000000000..92136767c01 --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-3.6.0.0.md @@ -0,0 +1,67 @@ +## Version 3.6.0.0 Release Notes + +Compatible with OpenSearch and OpenSearch Dashboards version 3.6.0.0 + +### Features +* Update mend config to allow remediation ([#5287](https://github.com/opensearch-project/sql/pull/5287)) +* Add unified query parser API ([#5274](https://github.com/opensearch-project/sql/pull/5274)) +* Add profiling support to unified query API ([#5268](https://github.com/opensearch-project/sql/pull/5268)) +* Add Calcite native SQL planning in UnifiedQueryPlanner ([#5257](https://github.com/opensearch-project/sql/pull/5257)) +* Add query cancellation support via _tasks/_cancel API for PPL queries ([#5254](https://github.com/opensearch-project/sql/pull/5254)) +* Support graphLookup with literal value as its start ([#5253](https://github.com/opensearch-project/sql/pull/5253)) +* PPL Highlight Support ([#5234](https://github.com/opensearch-project/sql/pull/5234)) +* Support creating/updating prometheus rules ([#5228](https://github.com/opensearch-project/sql/pull/5228)) +* Change the final output result of struct from list to map ([#5227](https://github.com/opensearch-project/sql/pull/5227)) +* added cloudwatch style contains operator ([#5219](https://github.com/opensearch-project/sql/pull/5219)) +* Update graphlookup syntax ([#5209](https://github.com/opensearch-project/sql/pull/5209)) +* Onboard code diff analyzer and reviewer (sql) ([#5183](https://github.com/opensearch-project/sql/pull/5183)) +* Add grammar bundle generation API for PPL language features ([#5162](https://github.com/opensearch-project/sql/pull/5162)) +* Support PPL queries when having trailing pipes and/or empty pipes ([#5161](https://github.com/opensearch-project/sql/pull/5161)) +* Bump ANTLR Version to 4.13.2 ([#5159](https://github.com/opensearch-project/sql/pull/5159)) +* feat: Implement PPL convert command with 5 conversion functions ([#5157](https://github.com/opensearch-project/sql/pull/5157)) +* Make sql plugin aware of FIPS build param (-Pcrypto.standard=FIPS-140-3) ([#5155](https://github.com/opensearch-project/sql/pull/5155)) +* PPL Command: MvExpand ([#5144](https://github.com/opensearch-project/sql/pull/5144)) +* Add auto-extract mode for `spath` command ([#5140](https://github.com/opensearch-project/sql/pull/5140)) +* Support bi-directional graph traversal command `graphlookup` ([#5138](https://github.com/opensearch-project/sql/pull/5138)) +* Add nomv command ([#5130](https://github.com/opensearch-project/sql/pull/5130)) +* Improve resource monitor errors ([#5129](https://github.com/opensearch-project/sql/pull/5129)) +* Support fetch_size API for PPL ([#5109](https://github.com/opensearch-project/sql/pull/5109)) +* LAST/FIRST/TAKE aggregation should support TEXT type and Scripts ([#5091](https://github.com/opensearch-project/sql/pull/5091)) +* fieldformat command implementation ([#5080](https://github.com/opensearch-project/sql/pull/5080)) +* Implement `reverse` performance optimization ([#4775](https://github.com/opensearch-project/sql/pull/4775)) + +### Bug Fixes +* Fix flaky TPC-H Q1 test due to bugs in `MatcherUtils.closeTo()` ([#5283](https://github.com/opensearch-project/sql/pull/5283)) +* Fix typo: rename renameClasue to renameClause ([#5252](https://github.com/opensearch-project/sql/pull/5252)) +* Fix `isnotnull()` not being pushed down when combined with multiple `!=` conditions ([#5238](https://github.com/opensearch-project/sql/pull/5238)) +* Fix memory leak: ExecutionEngine recreated per query appending to global function registry ([#5222](https://github.com/opensearch-project/sql/pull/5222)) +* Fix PIT (Point in Time) resource leaks in v2 query engine ([#5221](https://github.com/opensearch-project/sql/pull/5221)) +* Fix MAP path resolution for `top/rare`, `join`, `lookup` and `streamstats` ([#5206](https://github.com/opensearch-project/sql/pull/5206)) +* Fix #5163: Return null for double overflow to Infinity in arithmetic ([#5202](https://github.com/opensearch-project/sql/pull/5202)) +* Fix MAP path resolution for symbol-based PPL commands ([#5198](https://github.com/opensearch-project/sql/pull/5198)) +* Fix #5176: Return actual null from JSON_EXTRACT for missing/null paths ([#5196](https://github.com/opensearch-project/sql/pull/5196)) +* Fix multisearch UDT type loss through UNION (#5145, #5146, #5147) ([#5154](https://github.com/opensearch-project/sql/pull/5154)) +* Fix path navigation on map columns for `spath` command ([#5149](https://github.com/opensearch-project/sql/pull/5149)) +* Fix pitest dependency resolution with stable runtime version ([#5143](https://github.com/opensearch-project/sql/pull/5143)) +* Fix #5114: preserve head/TopK semantics for sort-expression pushdown ([#5135](https://github.com/opensearch-project/sql/pull/5135)) +* Fix fallback error handling to show original Calcite error ([#5133](https://github.com/opensearch-project/sql/pull/5133)) +* Fix the bug when boolean comparison condition is simplifed to field ([#5071](https://github.com/opensearch-project/sql/pull/5071)) +* Fix issue connecting with prometheus by wrapping with AccessController.doPrivilegedChecked ([#5061](https://github.com/opensearch-project/sql/pull/5061)) + +### Infrastructure +* Add gradle.properties file to build sql with -Pcrypto.standard=FIPS-140-3 by default ([#5231](https://github.com/opensearch-project/sql/pull/5231)) +* Fix the flaky yamlRestTest caused by order of sample_logs ([#5119](https://github.com/opensearch-project/sql/pull/5119)) +* Fix the filter of integTestWithSecurity ([#5098](https://github.com/opensearch-project/sql/pull/5098)) + +### Documentation +* Apply docs website feedback to ppl functions ([#5207](https://github.com/opensearch-project/sql/pull/5207)) + +### Maintenance +* Move some maintainers from active to Emeritus ([#5260](https://github.com/opensearch-project/sql/pull/5260)) +* Add CLAUDE.md ([#5259](https://github.com/opensearch-project/sql/pull/5259)) +* Add songkant-aws as maintainer ([#5244](https://github.com/opensearch-project/sql/pull/5244)) +* Add ahkcs as maintainer ([#5223](https://github.com/opensearch-project/sql/pull/5223)) +* Fix bc-fips jar hell by marking dependency as compileOnly ([#5158](https://github.com/opensearch-project/sql/pull/5158)) +* Revert dynamic column support ([#5139](https://github.com/opensearch-project/sql/pull/5139)) +* Increment version to 3.6.0-SNAPSHOT ([#5115](https://github.com/opensearch-project/sql/pull/5115)) +* Upgrade assertj-core to 3.27.7 ([#5100](https://github.com/opensearch-project/sql/pull/5100)) diff --git a/scripts/docs_exporter/export_to_docs_website.py b/scripts/docs_exporter/export_to_docs_website.py index 51c708cf3f4..ce150f91c67 100755 --- a/scripts/docs_exporter/export_to_docs_website.py +++ b/scripts/docs_exporter/export_to_docs_website.py @@ -104,6 +104,14 @@ def convert_sql_table_to_markdown(table_text: str) -> str: cells = [c.strip() for c in line.strip().strip('|').split('|')] # Escape angle brackets for Jekyll in converted tables (results tables) cells = [c.replace('<', '\\<').replace('>', '\\>').replace('*', '\\*') for c in cells] + # Wrap cell in backticks if it's a single %-prefixed token (e.g., %a -> `%a`) + def backtick_percent(cell): + if "'" in cell or '"' in cell or '`' in cell: + return cell + if re.match(r'^%\S+$', cell): + return f'`{cell}`' + return cell + cells = [backtick_percent(c) for c in cells] result.append('| ' + ' | '.join(cells) + ' |') if not header_done: result.append('|' + '|'.join([' --- ' for _ in cells]) + '|') @@ -350,6 +358,8 @@ def export_docs( files_by_dir[dir_name].sort(key=lambda f: f.name) if dir_name == "cmd" and any(f.name == "syntax.md" for f in files_by_dir[dir_name]): files_by_dir[dir_name].sort(key=lambda f: (f.name != "syntax.md", f.name)) + if dir_name == "functions" and any(f.name == "index.md" for f in files_by_dir[dir_name]): + files_by_dir[dir_name].sort(key=lambda f: (f.name != "index.md", f.name)) for _, files in files_by_dir.items(): for i, md_file in enumerate(files, 1): diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 5f7361160b3..6b34507eacc 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -109,8 +109,18 @@ fromClause ; relation - : tableName (AS? alias)? # tableAsRelation - | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation + : tableName (AS? alias)? # tableAsRelation + | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation + | qualifiedName LR_BRACKET tableFunctionArgs RR_BRACKET (AS? alias)? # tableFunctionRelation + ; + +tableFunctionArgs + : tableFunctionArg (COMMA tableFunctionArg)* + ; + +tableFunctionArg + : ident EQUAL_SYMBOL functionArg + | functionArg ; whereClause diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index bdbc360713c..5250ab7fb0f 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -13,6 +13,7 @@ import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SelectElementContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.SubqueryAsRelationContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableAsRelationContext; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.TableFunctionRelationContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.WhereClauseContext; import static org.opensearch.sql.sql.parser.ParserUtils.getTextInQuery; import static org.opensearch.sql.utils.SystemIndexUtils.TABLE_INFO; @@ -20,12 +21,14 @@ import com.google.common.collect.ImmutableList; import java.util.Collections; +import java.util.Locale; import java.util.Optional; import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.tree.DescribeRelation; import org.opensearch.sql.ast.tree.Filter; @@ -34,10 +37,12 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.SubqueryAlias; +import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.QuerySpecificationContext; @@ -189,6 +194,57 @@ public UnresolvedPlan visitSubqueryAsRelation(SubqueryAsRelationContext ctx) { return new RelationSubquery(visit(ctx.subquery), subqueryAlias); } + @Override + public UnresolvedPlan visitTableFunctionRelation(TableFunctionRelationContext ctx) { + // The grammar accepts both `ident = value` and bare `value` forms for each table function + // argument so that the real positional shape (e.g. `vectorSearch('idx', field='f', ...)`) + // reaches this V2 builder instead of failing to parse and silently falling back to the + // legacy SQL engine. Reject the positional shape here with a SemanticCheckException so the + // user receives a clean 400 rather than an opaque legacy parser error. + ctx.tableFunctionArgs() + .tableFunctionArg() + .forEach( + arg -> { + if (arg.ident() == null) { + String functionName = ctx.qualifiedName().getText(); + throw new SemanticCheckException( + String.format( + Locale.ROOT, + "Table function '%s' requires named arguments (e.g. name='value')," + + " but received a positional argument: %s", + functionName, + arg.functionArg().getText())); + } + }); + ImmutableList.Builder args = ImmutableList.builder(); + ctx.tableFunctionArgs() + .tableFunctionArg() + .forEach( + arg -> { + String argName = + StringUtils.unquoteIdentifier(arg.ident().getText()).toLowerCase(Locale.ROOT); + UnresolvedExpression argValue = visitAstExpression(arg.functionArg()); + args.add(new UnresolvedArgument(argName, argValue)); + }); + TableFunction tableFunction = + new TableFunction(visitAstExpression(ctx.qualifiedName()), args.build()); + if (ctx.alias() == null) { + String functionName = ctx.qualifiedName().getText(); + // Use SemanticCheckException (not SyntaxCheckException) so the request does not fall back + // to the legacy SQL engine, whose opaque parser error would mask this message. + throw new SemanticCheckException( + String.format( + Locale.ROOT, + "Table function '%s' requires a table alias." + + " Add an alias after the closing parenthesis, for example:" + + " FROM %s(...) AS v", + functionName, + functionName)); + } + String alias = StringUtils.unquoteIdentifier(ctx.alias().getText()); + return new SubqueryAlias(alias, tableFunction); + } + @Override public UnresolvedPlan visitWhereClause(WhereClauseContext ctx) { return new Filter(visitAstExpression(ctx.expression())); diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java index 1ecaa181e6f..695cf85b144 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstBuilderTest.java @@ -6,6 +6,8 @@ package org.opensearch.sql.sql.parser; import static java.util.Collections.emptyList; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.ast.dsl.AstDSL.agg; @@ -40,7 +42,11 @@ import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.NestedAllTupleFields; +import org.opensearch.sql.ast.expression.UnresolvedArgument; +import org.opensearch.sql.ast.tree.SubqueryAlias; +import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.exception.SemanticCheckException; class AstBuilderTest extends AstBuilderTestBase { @@ -131,6 +137,142 @@ public void can_build_from_index_with_alias_quoted() { buildAST("SELECT `t`.name FROM test `t` WHERE `t`.age = 30")); } + @Test + public void can_build_from_table_function() { + assertEquals( + project( + new SubqueryAlias( + "v", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("field", stringLiteral("embedding")), + new UnresolvedArgument("vector", stringLiteral("[0.1,0.2]")), + new UnresolvedArgument("option", stringLiteral("k=10"))))), + AllFields.of()), + buildAST( + "SELECT * FROM vectorSearch(" + + "table='products', field='embedding', " + + "vector='[0.1,0.2]', option='k=10') AS v")); + } + + @Test + public void can_build_from_table_function_with_where_order_limit() { + assertEquals( + project( + limit( + sort( + filter( + new SubqueryAlias( + "s", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("field", stringLiteral("embedding")), + new UnresolvedArgument("vector", stringLiteral("[0.1,0.2]")), + new UnresolvedArgument("option", stringLiteral("k=10"))))), + function("=", qualifiedName("s", "category"), stringLiteral("shoes"))), + field(qualifiedName("s", "_score"), argument("asc", booleanLiteral(false)))), + 5, + 0), + alias("s.title", qualifiedName("s", "title")), + alias("s._score", qualifiedName("s", "_score"))), + buildAST( + "SELECT s.title, s._score FROM vectorSearch(" + + "table='products', field='embedding', " + + "vector='[0.1,0.2]', option='k=10') AS s " + + "WHERE s.category = 'shoes' " + + "ORDER BY s._score DESC " + + "LIMIT 5")); + } + + @Test + public void table_function_args_are_resolved_by_name_not_position() { + assertEquals( + project( + new SubqueryAlias( + "v", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("option", stringLiteral("k=10")), + new UnresolvedArgument("field", stringLiteral("embedding")), + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("vector", stringLiteral("[0.1,0.2]"))))), + AllFields.of()), + buildAST( + "SELECT * FROM vectorSearch(" + + "option='k=10', field='embedding', " + + "table='products', vector='[0.1,0.2]') AS v")); + } + + @Test + public void table_function_arg_names_are_canonicalized() { + assertEquals( + project( + new SubqueryAlias( + "v", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("field", stringLiteral("embedding")), + new UnresolvedArgument("vector", stringLiteral("[0.1,0.2]")), + new UnresolvedArgument("option", stringLiteral("k=10"))))), + AllFields.of()), + buildAST( + "SELECT * FROM vectorSearch(" + + "TABLE='products', FIELD='embedding', " + + "VECTOR='[0.1,0.2]', OPTION='k=10') AS v")); + } + + @Test + public void table_function_allows_alias_without_as_keyword() { + assertEquals( + project( + new SubqueryAlias( + "v", + new TableFunction( + qualifiedName("vectorSearch"), + ImmutableList.of( + new UnresolvedArgument("table", stringLiteral("products")), + new UnresolvedArgument("vector", stringLiteral("[0.1]"))))), + AllFields.of()), + buildAST("SELECT * FROM vectorSearch(table='products', vector='[0.1]') v")); + } + + @Test + public void table_function_relation_requires_alias() { + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + buildAST( + "SELECT * FROM vectorSearch(" + + "table='products', field='embedding', " + + "vector='[0.1,0.2]', option='k=10')")); + assertThat(ex.getMessage(), containsString("requires a table alias")); + assertThat(ex.getMessage(), containsString("vectorSearch")); + } + + @Test + public void table_function_relation_rejects_positional_argument() { + // Grammar accepts both `ident=value` and bare `value` for each table function argument so + // the real positional shape reaches the V2 AstBuilder. The AstBuilder must reject it with a + // SemanticCheckException rather than let the request fall back to the legacy engine. + SemanticCheckException ex = + assertThrows( + SemanticCheckException.class, + () -> + buildAST( + "SELECT * FROM vectorSearch('products', field='embedding', " + + "vector='[0.1,0.2]', option='k=10') AS v")); + org.junit.jupiter.api.Assertions.assertTrue( + ex.getMessage().contains("requires named arguments")); + } + @Test public void can_build_where_clause() { assertEquals(