Skip to content

Commit 121064f

Browse files
authored
Merge branch 'main' into feat/offset-pushdown
2 parents eee57af + 0bb17bc commit 121064f

52 files changed

Lines changed: 5821 additions & 253 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.asf.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ github:
5858
- "Check Markdown Links"
5959
- "Validate required_status_checks in .asf.yaml"
6060
- "Spell Check with Typos"
61+
- "Circular Dependency Check"
62+
- "Detect Unused Dependencies"
6163
# needs to be updated as part of the release process
6264
# .asf.yaml doesn't support wildcard branch protection rules, only exact branch names
6365
# https://github.com/apache/infrastructure-asfyaml?tab=readme-ov-file#branch-protection

.github/workflows/breaking_changes_detector.yml

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,24 @@
2020
# Only public workspace crates that have file changes are checked.
2121
# Internal crates (benchmarks, test-utils, sqllogictest, doc) are excluded.
2222
#
23-
# If breaking changes are found, a sticky comment is posted on the PR.
24-
# The comment is removed automatically once the issues are resolved.
23+
# This workflow only runs cargo-semver-checks and uploads the result as an
24+
# artifact. The actual PR comment is posted by a companion workflow
25+
# (`breaking_changes_detector_comment.yml`) that picks up the artifact via
26+
# `workflow_run`.
27+
#
28+
# Why split it?
29+
# "The GITHUB_TOKEN has read-only permissions in pull requests from forked
30+
# repositories."
31+
# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request
32+
# A read-only token cannot post comments, so on fork PRs the previous
33+
# single-workflow design failed with HTTP 403. We can't simply broaden the
34+
# trigger here either: cargo-semver-checks compiles PR code (build.rs, proc
35+
# macros), so granting this job a write token would expose it to any code
36+
# in the PR. And ASF infra policy independently forbids `pull_request_target`
37+
# for any workflow that exposes GITHUB_TOKEN
38+
# (https://infra.apache.org/github-actions-policy.html). The companion
39+
# `workflow_run` workflow runs in the base-repo context with write access
40+
# and never executes PR code.
2541

2642
name: "Detect breaking changes"
2743

@@ -37,11 +53,6 @@ jobs:
3753
check-semver:
3854
name: Check semver
3955
runs-on: ubuntu-latest
40-
outputs:
41-
logs: ${{ steps.check_semver.outputs.logs }}
42-
# Default to "success" so the comment job clears any stale comment
43-
# when the check step is skipped (e.g. no published crates changed).
44-
result: ${{ steps.check_semver.outputs.result || 'success' }}
4556
steps:
4657
- name: Checkout
4758
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -66,6 +77,15 @@ jobs:
6677
echo "packages=$PACKAGES" >> "$GITHUB_OUTPUT"
6778
echo "Changed crates: $PACKAGES"
6879
80+
# `datafusion-substrait` (and crates that depend on it via sqllogictest)
81+
# have a build script that calls protoc, which is not preinstalled on
82+
# ubuntu-latest runners.
83+
- name: Install Protobuf Compiler
84+
if: steps.changed_crates.outputs.packages != ''
85+
run: |
86+
sudo apt-get update
87+
sudo apt-get install -y protobuf-compiler
88+
6989
- name: Install cargo-semver-checks
7090
if: steps.changed_crates.outputs.packages != ''
7191
uses: taiki-e/install-action@94cb46f8d6e437890146ffbd78a778b78e623fb2 # v2.74.0
@@ -85,11 +105,6 @@ jobs:
85105
ci/scripts/changed_crates.sh semver-check "origin/${BASE_REF}" $PACKAGES \
86106
2>&1 | tee /tmp/semver-output.txt
87107
EXIT_CODE=${PIPESTATUS[0]}
88-
{
89-
echo "logs<<EOF"
90-
sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt
91-
echo "EOF"
92-
} >> "$GITHUB_OUTPUT"
93108
# Pass the result through an output instead of failing the job:
94109
# a detected breaking change should surface as a PR comment, not a
95110
# red check, so PR authors aren't confused by an intentional break.
@@ -99,28 +114,29 @@ jobs:
99114
echo "result=failure" >> "$GITHUB_OUTPUT"
100115
fi
101116
102-
# Post or remove a sticky comment on the PR based on the semver check result.
103-
comment-on-pr:
104-
name: Comment on pull request
105-
runs-on: ubuntu-latest
106-
needs: check-semver
107-
if: always()
108-
permissions:
109-
contents: read
110-
pull-requests: write
111-
steps:
112-
- name: Checkout
113-
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
114-
with:
115-
sparse-checkout: ci/scripts
116-
117-
- name: Update PR comment
117+
# Stage the data the companion comment workflow needs into a single
118+
# directory. We default the result to "success" so the comment
119+
# workflow clears any stale comment when the check step is skipped
120+
# (e.g. no published crates changed).
121+
- name: Stage artifact for comment workflow
122+
if: always()
118123
env:
119-
GH_TOKEN: ${{ github.token }}
120-
REPO: ${{ github.repository }}
121124
PR_NUMBER: ${{ github.event.pull_request.number }}
122-
CHECK_RESULT: ${{ needs.check-semver.outputs.result }}
123-
SEMVER_LOGS: ${{ needs.check-semver.outputs.logs }}
125+
CHECK_RESULT: ${{ steps.check_semver.outputs.result || 'success' }}
124126
run: |
125-
ci/scripts/changed_crates.sh comment \
126-
"$REPO" "$PR_NUMBER" "$CHECK_RESULT" "$SEMVER_LOGS"
127+
mkdir -p semver-artifact
128+
echo "$PR_NUMBER" > semver-artifact/pr_number
129+
echo "$CHECK_RESULT" > semver-artifact/result
130+
if [ -f /tmp/semver-output.txt ]; then
131+
sed 's/\x1b\[[0-9;]*m//g' /tmp/semver-output.txt > semver-artifact/logs
132+
else
133+
: > semver-artifact/logs
134+
fi
135+
136+
- name: Upload artifact
137+
if: always()
138+
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
139+
with:
140+
name: semver-check-result
141+
path: semver-artifact/
142+
retention-days: 1
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Companion to `breaking_changes_detector.yml`. Posts the sticky PR comment.
19+
#
20+
# Why this workflow exists:
21+
# "The GITHUB_TOKEN has read-only permissions in pull requests from forked
22+
# repositories."
23+
# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request
24+
# That is why the upstream `pull_request` workflow cannot post the comment
25+
# itself when the PR comes from a fork.
26+
#
27+
# Why not `pull_request_target`? ASF infra policy forbids it:
28+
# "You MUST NOT use `pull_request_target` as a trigger on ANY action that
29+
# exports ANY confidential credentials or tokens such as GITHUB_TOKEN or
30+
# NPM_TOKEN."
31+
# https://infra.apache.org/github-actions-policy.html
32+
# `workflow_run` is the supported alternative: it runs in the base
33+
# repository's context regardless of where the upstream run was triggered
34+
# from, so the GITHUB_TOKEN here can be granted `pull-requests: write`. See:
35+
# https://docs.github.com/en/actions/reference/events-that-trigger-workflows#workflow_run
36+
#
37+
# Security note: this workflow MUST NOT check out or execute any code from
38+
# the PR. The artifact's contents originate from a workflow run that may
39+
# have compiled fork-controlled code, so PR_NUMBER and CHECK_RESULT are
40+
# validated against strict patterns before being passed to any action.
41+
42+
name: "Detect breaking changes - Comment"
43+
44+
on:
45+
workflow_run:
46+
workflows: ["Detect breaking changes"]
47+
types:
48+
- completed
49+
50+
permissions:
51+
contents: read
52+
53+
jobs:
54+
comment-on-pr:
55+
name: Comment on pull request
56+
if: github.event.workflow_run.event == 'pull_request'
57+
runs-on: ubuntu-latest
58+
# Scoped to the minimum needed to upsert/delete the sticky comment.
59+
permissions:
60+
actions: read
61+
pull-requests: write
62+
steps:
63+
- name: Download semver-check artifact
64+
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
65+
with:
66+
name: semver-check-result
67+
run-id: ${{ github.event.workflow_run.id }}
68+
github-token: ${{ github.token }}
69+
path: ./semver-artifact
70+
71+
- name: Read and validate artifact
72+
id: read
73+
run: |
74+
set -euo pipefail
75+
# Validate every field: the artifact comes from a workflow run
76+
# that compiled fork-controlled code, so its contents are untrusted.
77+
PR_NUMBER=$(cat ./semver-artifact/pr_number)
78+
if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then
79+
echo "Invalid PR number: $PR_NUMBER" >&2
80+
exit 1
81+
fi
82+
CHECK_RESULT=$(cat ./semver-artifact/result)
83+
if [[ "$CHECK_RESULT" != "success" && "$CHECK_RESULT" != "failure" ]]; then
84+
echo "Invalid check result: $CHECK_RESULT" >&2
85+
exit 1
86+
fi
87+
echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT"
88+
echo "result=$CHECK_RESULT" >> "$GITHUB_OUTPUT"
89+
90+
# Multi-line output: random delimiter so a malicious log line can't
91+
# close the heredoc and inject extra output keys. See:
92+
# https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#multiline-strings
93+
DELIM="EOF_$(openssl rand -hex 16)"
94+
{
95+
echo "logs<<${DELIM}"
96+
cat ./semver-artifact/logs
97+
echo "${DELIM}"
98+
} >> "$GITHUB_OUTPUT"
99+
100+
# The marker `<!-- semver-check-comment -->` is what makes the comment
101+
# "sticky": maintain-one-comment uses it to find and replace (or
102+
# delete) the existing comment instead of stacking new ones.
103+
- name: Upsert sticky comment
104+
if: steps.read.outputs.result != 'success'
105+
uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0
106+
with:
107+
token: ${{ secrets.GITHUB_TOKEN }}
108+
number: ${{ steps.read.outputs.pr_number }}
109+
body-include: '<!-- semver-check-comment -->'
110+
body: |
111+
<!-- semver-check-comment -->
112+
Thank you for opening this pull request!
113+
114+
Reviewer note: [cargo-semver-checks](https://github.com/obi1kenobi/cargo-semver-checks) reported the current version number is not SemVer-compatible with the changes in this pull request (compared against the base branch).
115+
116+
<details>
117+
<summary>Details</summary>
118+
119+
```
120+
${{ steps.read.outputs.logs }}
121+
```
122+
123+
</details>
124+
125+
- name: Delete sticky comment
126+
if: steps.read.outputs.result == 'success'
127+
uses: actions-cool/maintain-one-comment@909842216bc8e8658364c572ec52100f4c2cc50a # v3.3.0
128+
with:
129+
token: ${{ secrets.GITHUB_TOKEN }}
130+
number: ${{ steps.read.outputs.pr_number }}
131+
body-include: '<!-- semver-check-comment -->'
132+
delete: true

.github/workflows/dependencies.yml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,7 @@ on:
2525
push:
2626
branches-ignore:
2727
- 'gh-readonly-queue/**'
28-
paths:
29-
- "**/Cargo.toml"
30-
- "**/Cargo.lock"
3128
pull_request:
32-
paths:
33-
- "**/Cargo.toml"
34-
- "**/Cargo.lock"
3529
merge_group:
3630
# manual trigger
3731
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
@@ -42,7 +36,7 @@ permissions:
4236

4337
jobs:
4438
depcheck:
45-
name: circular dependency check
39+
name: Circular Dependency Check
4640
runs-on: ubuntu-latest
4741
container:
4842
image: amd64/rust
@@ -61,6 +55,7 @@ jobs:
6155
cargo run
6256
6357
detect-unused-dependencies:
58+
name: Detect Unused Dependencies
6459
runs-on: ubuntu-latest
6560
container:
6661
image: amd64/rust

.github/workflows/docs.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ name: Deploy DataFusion site
2828

2929
jobs:
3030
build-docs:
31+
permissions:
32+
contents: write
3133
name: Build docs
3234
runs-on: ubuntu-latest
3335
steps:

.github/workflows/rust.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ on:
4242
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
4343
workflow_dispatch:
4444

45+
permissions:
46+
contents: read
47+
4548
jobs:
4649
# Check crate compiles and base cargo check passes
4750
linux-build-lib:
@@ -740,7 +743,7 @@ jobs:
740743
with:
741744
submodules: true
742745
fetch-depth: 1
743-
746+
744747
- name: Mark repository as safe for git
745748
# Required for git commands inside container (avoids "dubious ownership" error)
746749
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"

Cargo.lock

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmarks/.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
data
2-
results
2+
data_csv
3+
./results/
34
venv
5+
!sql_benchmarks/**/results/

benchmarks/Cargo.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ mimalloc_extended = ["libmimalloc-sys/extended"]
4343
arrow = { workspace = true }
4444
async-trait = "0.1"
4545
bytes = { workspace = true }
46-
clap = { version = "4.5.60", features = ["derive"] }
46+
clap = { version = "4.6.0", features = ["derive", "env"] }
47+
criterion = { workspace = true, features = ["html_reports"] }
4748
datafusion = { workspace = true, default-features = true }
4849
datafusion-common = { workspace = true, default-features = true }
4950
env_logger = { workspace = true }
@@ -63,3 +64,8 @@ tokio-util = { version = "0.7.17" }
6364

6465
[dev-dependencies]
6566
datafusion-proto = { workspace = true }
67+
tempfile = { workspace = true }
68+
69+
[[bench]]
70+
harness = false
71+
name = "sql"

0 commit comments

Comments
 (0)