Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
230 commits
Select commit Hold shift + click to select a range
56b9725
playground: scaffold ClickBench Firecracker microVM service
alexey-milovidov May 12, 2026
d1e144c
playground: mark chroot's /dev /proc /sys as rslave
alexey-milovidov May 12, 2026
41ed4b3
playground: shrink snapshots with restart + drop_caches + zstd
alexey-milovidov May 12, 2026
db4625a
playground: capture stop/start output in provision log
alexey-milovidov May 12, 2026
f9aed82
playground: kick daemon on agent boot, refresh rootfs on re-provision
alexey-milovidov May 12, 2026
b3c05ee
playground: pre-stamp 'ubuntu' in /etc/hosts
alexey-milovidov May 12, 2026
7181240
playground: pre-snapshot sync from host + drop daemon restart
alexey-milovidov May 12, 2026
446b3f7
playground: stop daemon before snapshot for tiny snapshot.bin
alexey-milovidov May 12, 2026
2d3ac3f
playground: init_on_free=1 + ensure-daemon-up on first /query
alexey-milovidov May 12, 2026
fd5d74f
playground: checkpoint — ClickHouse smoke test passes
alexey-milovidov May 12, 2026
f1088ec
playground: shared RO datasets disk + per-restore golden-disk clone
alexey-milovidov May 12, 2026
69f29a4
playground: overlayfs at /opt/clickbench/system, no dataset copy
alexey-milovidov May 12, 2026
ec99983
playground: enforce build/provision semaphores, clone-based rootfs
alexey-milovidov May 12, 2026
ab2fa8c
playground: flatten datasets, drop the symlink layout
alexey-milovidov May 12, 2026
d69f557
playground: cbsystem disk 200 GB sparse, not 2 GB
alexey-milovidov May 13, 2026
c33fd3b
playground: drop per-clone e2fsck, do it once at base build
alexey-milovidov May 13, 2026
06bf791
playground: pre-size base rootfs to 200 GB, drop per-clone resize2fs
alexey-milovidov May 13, 2026
b804e76
playground: drop redundant sync calls in image builds
alexey-milovidov May 13, 2026
5ea3be2
playground: fstrim before snapshot so freed dataset bytes leave the g…
alexey-milovidov May 13, 2026
b0d0c36
ClickBench: replace dataset copy/move with symlinks where safe
alexey-milovidov May 13, 2026
63cb2e5
playground: move agent off port 8080 to 50080
alexey-milovidov May 13, 2026
c824524
playground: ship lib/download-hits-* stubs at /opt/clickbench/lib
alexey-milovidov May 13, 2026
97758e0
playground: switch to Ubuntu's generic kernel + parse ip= from userspace
alexey-milovidov May 13, 2026
d6fe6df
playground: install Ubuntu kernel modules into base rootfs
alexey-milovidov May 13, 2026
ca8bc08
playground: use dpkg-deb -x for kernel modules to keep apt clean
alexey-milovidov May 13, 2026
5ec4e63
playground: parallel-provisioning report
alexey-milovidov May 13, 2026
70fd2a7
playground: fix fc-spawn underscore crash, OOM at 16 GB, $USER unset
alexey-milovidov May 13, 2026
a324d1f
playground: bump VM RAM to 48 GB, raise check timeout to 15 min
alexey-milovidov May 13, 2026
32a8504
playground: disable DataFrame-style engines, revert VM RAM to 16 GB
alexey-milovidov May 13, 2026
e759e96
playground: also disable duckdb-memory
alexey-milovidov May 13, 2026
b3db27e
playground: bump /snapshot/create timeout to 30 min
alexey-milovidov May 13, 2026
0689a33
playground: snapshot-specific semaphore (default 6)
alexey-milovidov May 13, 2026
d7a3f31
playground: restore VM state from disk on server start
alexey-milovidov May 13, 2026
c925886
playground: bump /snapshot/create timeout to 60 min
alexey-milovidov May 13, 2026
27db4c7
playground: snapshot/restore overhaul + UI iteration
alexey-milovidov May 13, 2026
cc76bc2
playground: hover tooltip on system slabs with uptime/status
alexey-milovidov May 13, 2026
3c1014f
playground: CSS-only system slab tooltip
alexey-milovidov May 13, 2026
2316c2a
playground: triangle pin under tooltip
alexey-milovidov May 13, 2026
428787d
playground: enlarge tooltip pin so it spans slab to box
alexey-milovidov May 13, 2026
e953f34
playground: no text selection on slabs; second click = run
alexey-milovidov May 13, 2026
230ad1d
playground: preserve example index across system switches
alexey-milovidov May 13, 2026
b9f7d90
playground: drop ' (script)' suffix from Time
alexey-milovidov May 13, 2026
fd457a4
playground: swap to new system's example only if textarea unedited
alexey-milovidov May 13, 2026
b33b8e1
playground: warm up snapshotted VMs on select
alexey-milovidov May 13, 2026
b502e4b
playground: /api/admin/provision actually triggers initial provision
alexey-milovidov May 13, 2026
e995418
playground: web UI works when opened as file://
alexey-milovidov May 13, 2026
f0c9cff
playground: default to clickhouse when no #hash selection
alexey-milovidov May 13, 2026
06ea5bf
Remove clutter
alexey-milovidov May 13, 2026
fc30338
Merge branch 'playground-wip' of github.com:ClickHouse/ClickBench int…
alexey-milovidov May 13, 2026
a754fbb
ClickBench: datafusion-vortex{,-partitioned} use datafusion-cli for q…
alexey-milovidov May 13, 2026
846d650
playground: hide output pane until a query result exists
alexey-milovidov May 13, 2026
5880597
playground: hide time stat until a query runs
alexey-milovidov May 13, 2026
d22b6b6
playground: re-apply example even when same option is re-picked
alexey-milovidov May 13, 2026
57c96b0
playground: ignore clicks on provisioning slabs
alexey-milovidov May 13, 2026
1d3c6b1
playground: raise output cap default to 256 KB
alexey-milovidov May 13, 2026
168a3c3
ClickBench/arc: launch via noble's loader so it boots on 22.04
alexey-milovidov May 13, 2026
1599219
ClickBench/opteryx: print query results
alexey-milovidov May 13, 2026
c74de82
ClickBench/byconity: ./stop preserves data
alexey-milovidov May 13, 2026
aa6d07f
ClickBench/chdb-parquet-partitioned: use bare "hits_*.parquet" in que…
alexey-milovidov May 13, 2026
b4e15e3
ClickBench/drill: pass --add-opens flags through DRILL_JAVA_OPTS
alexey-milovidov May 13, 2026
dce5893
playground: re-apply example on dropdown blur
alexey-milovidov May 13, 2026
73f5c4a
Minor changes
alexey-milovidov May 13, 2026
94ba946
ClickBench/mongodb: pin to 7.0 to avoid Linux >= 6.19 refusal
alexey-milovidov May 13, 2026
6530243
Merge branch 'playground-wip' of github.com:ClickHouse/ClickBench int…
alexey-milovidov May 13, 2026
eaa1ca8
ClickBench/presto*: print query rows instead of discarding them
alexey-milovidov May 13, 2026
47a1de6
playground: SNI-allowlist proxy for datalake systems
alexey-milovidov May 13, 2026
7cdf03b
ClickBench/doris{,-parquet}: rename Apache Doris -> Doris in results
alexey-milovidov May 13, 2026
deabe31
Merge branch 'playground-wip' of github.com:ClickHouse/ClickBench int…
alexey-milovidov May 13, 2026
e346a39
ClickBench/monetdb: start cleans stale daemon state + runs 'monetdb s…
alexey-milovidov May 13, 2026
8d6a897
playground: post-query health check, restore if daemon died
alexey-milovidov May 13, 2026
354a8fa
ClickBench/arc: fix loader path in glibc-noble wrapper
alexey-milovidov May 13, 2026
59e4998
ClickBench/duckdb-datalake*: skip IMDS probe so playground queries
alexey-milovidov May 13, 2026
3e99a29
ClickBench/clickhouse-datalake*: NOSIGN to skip IMDS probe
alexey-milovidov May 13, 2026
547706a
ClickBench/datafusion-vortex: bin renamed clickbench -> query_bench
alexey-milovidov May 13, 2026
d41743f
ClickBench/starrocks: wait for BE to actually become alive
alexey-milovidov May 13, 2026
ddc5c19
ClickBench/{pandas,polars-dataframe}: rename queries.py -> queries.sql
alexey-milovidov May 13, 2026
c87513d
ClickBench/polars: drop SQL->lambda translation, eval expressions dir…
alexey-milovidov May 13, 2026
4581727
playground: enable_internet strips filtered_internet's DROP first
alexey-milovidov May 13, 2026
fe3f646
ClickBench/druid: sync start/load/data-size to 37.0.0
alexey-milovidov May 13, 2026
fb52a2b
ClickBench/drill: pass --add-opens via the env vars sqlline reads
alexey-milovidov May 13, 2026
f72c4ea
ClickBench/monetdb: try 127.0.0.1 explicitly, log mclient error
alexey-milovidov May 13, 2026
62efd48
ClickBench/duckdb-datalake*: pin S3 region in query, not just at load
alexey-milovidov May 13, 2026
5839b6e
ClickBench: monetdb/siglens/starrocks/tidb fixes
alexey-milovidov May 13, 2026
3194d99
playground: ship hits.json on the readonly dataset disk
alexey-milovidov May 14, 2026
954a0fe
ClickBench: round of provisioning fixes
alexey-milovidov May 14, 2026
b4d2a47
playground: switch base rootfs to Ubuntu 24.04 (noble)
alexey-milovidov May 14, 2026
548d870
ClickBench/mongodb: pull 7.0 from jammy repo on noble too
alexey-milovidov May 14, 2026
c28304d
playground: cache DNS + early-out preamble in SNI proxy
alexey-milovidov May 14, 2026
3e217d7
playground: keep *.json runtime files when building per-system disk
alexey-milovidov May 14, 2026
5dfd7b8
playground: per-VM 256 GB swap disk for dataframe systems
alexey-milovidov May 14, 2026
f13a660
playground: fix pinot, daft-parquet-partitioned, log full /provision …
alexey-milovidov May 14, 2026
013434e
playground: rework monetdb load/query without expect; siglens uses pr…
alexey-milovidov May 14, 2026
0a2907b
playground: fix monetdb auth + daft-parquet-partitioned glob
alexey-milovidov May 14, 2026
a3dc1ee
playground: pinot: wait for broker+server to register before AddTable
alexey-milovidov May 14, 2026
b698d3b
playground: monetdb uses --password=; agent skips pre-snapshot restar…
alexey-milovidov May 14, 2026
7b1c755
playground: mark dataframe systems with .preserve-state
alexey-milovidov May 14, 2026
3abe6a9
playground: monetdb: drop -u from mclient — let ~/.monetdb provide bo…
alexey-milovidov May 14, 2026
680ea5e
playground: agent sets _daemon_started for .preserve-state systems
alexey-milovidov May 14, 2026
5a7ce23
playground: monetdb check: drop -u/-P from mclient like start/load/query
alexey-milovidov May 14, 2026
db5a916
playground: monetdb query: use -t clock instead of '\t clock' meta
alexey-milovidov May 14, 2026
eed1238
playground: parseable: stream NDJSON chunks to /ingest, don't write 4…
alexey-milovidov May 14, 2026
4a1d618
playground: drop sirius from catalog — needs GPU we don't have
alexey-milovidov May 14, 2026
a8f47f7
playground: tidb: default to TIDB_MODE=tikv
alexey-milovidov May 14, 2026
2ddb2c1
playground: pinot: mark .preserve-state
alexey-milovidov May 14, 2026
c08f246
playground: split queries.sql by lines instead of ;\n
alexey-milovidov May 14, 2026
fa534a2
playground: dataframe systems return result + cleaner query.py extrac…
alexey-milovidov May 14, 2026
238dbbd
playground: handle_queries falls back to queries.{logsql,py,txt}
alexey-milovidov May 14, 2026
223f67e
playground: rename queries.{logsql,txt} -> queries.sql
alexey-milovidov May 14, 2026
1d4e0a4
playground: give umbra a 256 GB swap disk
alexey-milovidov May 14, 2026
94ce37d
playground: persist vm.last_error to <system>/last_error.txt
alexey-milovidov May 14, 2026
c2ebdb2
playground: revert blur listener; reset example <select> on textarea …
alexey-milovidov May 14, 2026
937486a
playground: agent: don't surface truncation as an error
alexey-milovidov May 14, 2026
73f5692
playground: cumulative CPU-seconds cap (default 1 hour, ready state o…
alexey-milovidov May 14, 2026
f603ee9
playground: agent: pull dmesg OOM lines when query fails with no outp…
alexey-milovidov May 14, 2026
8ed0669
playground: kick VM on any /query error, not just unhealthy-daemon
alexey-milovidov May 14, 2026
1486731
playground: route presto+trino datalake variants through the SNI proxy
alexey-milovidov May 14, 2026
58103ee
playground: persist slot assignments so existing snapshots keep their…
alexey-milovidov May 14, 2026
2f17f9a
playground: lower HOST_MIN_FREE_DISK_GB default 500->100
alexey-milovidov May 14, 2026
f669cc8
playground: agent: detect snapshot restore via /proc/stat btime + rec…
alexey-milovidov May 14, 2026
3e831e1
playground: load ClickHouse credentials from <state_dir>/clickhouse.conf
alexey-milovidov May 14, 2026
6a9aa7a
Minor edit
alexey-milovidov May 14, 2026
67d00d8
playground: clickhouse bootstrap + shared-query plumbing
alexey-milovidov May 14, 2026
b8a6cf8
playground: web: permalink via X-Query-Id + restore from ?q=<id>
alexey-milovidov May 14, 2026
7f0e8ee
Minor edit
alexey-milovidov May 14, 2026
b4810e2
Merge branch 'playground-wip' of github.com:ClickHouse/ClickBench int…
alexey-milovidov May 14, 2026
c273b35
playground: bootstrap: resolve writer host via ipify, not remote_addr…
alexey-milovidov May 14, 2026
f15f13f
Merge branch 'playground-wip' of github.com:ClickHouse/ClickBench int…
alexey-milovidov May 14, 2026
913871e
playground: reader user has no password (sha256 of empty string)
alexey-milovidov May 14, 2026
d002acd
playground: trino-datalake: switch to fs.native-s3 (the legacy hadoop…
alexey-milovidov May 14, 2026
0eccf8d
playground: trino: --output-format=ALIGNED so /api/query has output
alexey-milovidov May 14, 2026
0f17abf
playground: agent: clear _daemon_started after docker reconcile
alexey-milovidov May 14, 2026
03beb2b
playground: umbra: --memory=128g --memory-swap=-1 so cgroup sees the …
alexey-milovidov May 14, 2026
72fc905
playground: drill: -XX:-UseContainerSupport to dodge the cgroup-v2 NPE
alexey-milovidov May 14, 2026
ca0abe5
playground: web: 'Run all' competition mode
alexey-milovidov May 14, 2026
0635c48
playground: un-gate heavyai + oxla (both ship public docker images)
alexey-milovidov May 14, 2026
02f78ed
playground: web: flash competition row yellow for 1s on state change
alexey-milovidov May 14, 2026
1e97091
playground: web: competition panel becomes a left rail with clickable…
alexey-milovidov May 14, 2026
f622415
playground: web: left rail = max-content, right pane = remaining
alexey-milovidov May 14, 2026
7eb6dd5
playground: web: only switch to 2-col grid when competition is actual…
alexey-milovidov May 14, 2026
28f4372
playground: web: competition rail tweaks
alexey-milovidov May 14, 2026
32078cf
playground: web: 'System status' + 'Last error' move into the right pane
alexey-milovidov May 14, 2026
45ab7ca
playground: gizmosql: .mode box (was .mode trash, discarded all rows)
alexey-milovidov May 14, 2026
0e1adcc
playground: parseable: inline the ingest command — exported bash fn i…
alexey-milovidov May 14, 2026
ae98b6c
playground: web: competition runs 3 rounds, fires in shuffled order
alexey-milovidov May 14, 2026
54f28e8
playground: quickwit: detect 'message' errors + queries.json -> queri…
alexey-milovidov May 14, 2026
d53d960
playground: web: sort by best-so-far time, not only when all 3 rounds…
alexey-milovidov May 14, 2026
487a41b
playground: tidb: --tag clickbench so the data dir persists across re…
alexey-milovidov May 14, 2026
597d7ce
playground: web: 'Run all' also works for custom textarea queries
alexey-milovidov May 14, 2026
370f7e1
elasticsearch: wait for shards to recover before declaring ready
alexey-milovidov May 14, 2026
c060137
playground: web: right-align 'Run all' in the buttons row
alexey-milovidov May 14, 2026
bbae7fc
playground: web: hide competition panel on example pick / Run / edit
alexey-milovidov May 14, 2026
cffa226
gizmosql: disable non-TTY truncation so rows are printed
alexey-milovidov May 14, 2026
b14e2cc
playground: per-system VM RAM override; bump umbra to 96 GiB
alexey-milovidov May 14, 2026
67def65
trino-datalake{,-partitioned}: pin trino:455, restore hadoop-S3 + ano…
alexey-milovidov May 14, 2026
1d1c476
umbra: lean on the swap disk instead of a privileged RAM allotment
alexey-milovidov May 14, 2026
9b14149
trino-datalake{,-partitioned}: widen shim classpath after :455 pin
alexey-milovidov May 14, 2026
6f06afa
playground: web: keep competition rail open while editing the query
alexey-milovidov May 14, 2026
2f05785
playground: URL-encode X-Error so error newlines survive end-to-end
alexey-milovidov May 14, 2026
8487726
playground: web: re-baseline pristineQuery on rail clicks in example-…
alexey-milovidov May 14, 2026
ef25906
playground: druid post-restore recovery + agent btime watcher
alexey-milovidov May 14, 2026
6494e8e
parseable: use a wide time-window in queries, not today
alexey-milovidov May 14, 2026
874f100
heavyai: trailing ';' in check; oxla: drop from catalog
alexey-milovidov May 14, 2026
8ebfabf
playground: orioledb sysdisk bump + slab-hover rail highlight
alexey-milovidov May 14, 2026
c2f986e
drill: accept sqlline's current 'N row(s) selected (X.YYY seconds)' f…
alexey-milovidov May 14, 2026
58a02c5
tidb: mark .preserve-state so the snapshot keeps the loaded table
alexey-milovidov May 14, 2026
1110d9f
mongodb: emit timing from bash, not console.error inside mongosh
alexey-milovidov May 14, 2026
7377cac
umbra: drop docker memory cgroup; raise vm.max_map_count
alexey-milovidov May 14, 2026
a0e1893
trino-datalake: shim classpath via shell glob — trino:455 has no find…
alexey-milovidov May 14, 2026
13b4100
heavyai: allowlist /tmp for COPY FROM via omnisci.conf
alexey-milovidov May 14, 2026
74239fb
turso: enable RUST_BACKTRACE=1 in load and query
alexey-milovidov May 14, 2026
d8db153
playground: web: pretty-print JSON output bodies
alexey-milovidov May 14, 2026
18e73d8
starrocks: backend_alive check column 9 (Alive), not column 10
alexey-milovidov May 14, 2026
ff16bff
Merge remote-tracking branch 'origin/main' into playground-wip
alexey-milovidov May 14, 2026
f8f8840
playground: drop stale 10 KB output-cap references
alexey-milovidov May 14, 2026
c314486
playground: agent: allow concurrent /query in a single VM
alexey-milovidov May 14, 2026
5e3db62
playground: drop stale build-progress.md
alexey-milovidov May 14, 2026
fb9e740
playground: drop stale parallel-provisioning-report.md
alexey-milovidov May 14, 2026
c0d57e8
playground: drop stale writer/reader-password parameters from bootstr…
alexey-milovidov May 14, 2026
57da682
playground: drop reader_password field — it is always empty
alexey-milovidov May 14, 2026
e756e52
playground: add clickhouse-web to the catalog
alexey-milovidov May 14, 2026
397fa67
playground: enable databend, firebolt{,-parquet,-parquet-partitioned}…
alexey-milovidov May 14, 2026
8d3d4bf
playground: security hardening — aiohttp symlink, trusted internet, p…
alexey-milovidov May 14, 2026
ed1bbbb
playground: drop unused PLAYGROUND_MAX_VMS / max_warm_vms
alexey-milovidov May 14, 2026
8147d66
playground: aiohttp pin, systemd hardening, per-IP rate limits
alexey-milovidov May 14, 2026
ac98846
playground: rate limiter keys on TCP peer only, never X-Forwarded-For
alexey-milovidov May 14, 2026
a184c37
playground: real local DNS, rate-limit GC, clickhouse-web cache path
alexey-milovidov May 14, 2026
a8fc4ab
clickhouse-web: cache dir is now a symlink to /dev/shm/clickhouse
alexey-milovidov May 14, 2026
cecea2b
playground: TLS on 443 via Let's Encrypt + certbot
alexey-milovidov May 14, 2026
6b220d6
playground: web: reserve scrollbar gutter on the competition rail
alexey-milovidov May 14, 2026
e80b86f
firebolt: persist /firebolt-core/volume across pre-snapshot stop+start
alexey-milovidov May 14, 2026
2fd44d6
polars: mark .preserve-state so the loaded LazyFrame survives
alexey-milovidov May 14, 2026
71b7f4b
parseable: fail load loudly when logstream create returns non-2xx
alexey-milovidov May 14, 2026
5de540f
kinetica: mv the gzip into persist (revert symlink)
alexey-milovidov May 14, 2026
b458230
polars: include the eval result in /query response
alexey-milovidov May 14, 2026
7e71fe9
playground: compress per-system goldens with zstd to free disk
alexey-milovidov May 14, 2026
1674727
playground: idle-VM reaper — tear down 'ready' VMs after 10 min unused
alexey-milovidov May 15, 2026
ded372c
playground: revert manual zstd compression; rely on btrfs
alexey-milovidov May 15, 2026
879d87f
playground: force iptables-legacy in the base rootfs
alexey-milovidov May 15, 2026
c036a30
playground: in-VM download-hits-parquet-partitioned stub points at hi…
alexey-milovidov May 15, 2026
fee66e5
playground: preload iptable_raw + friends so dockerd networking works
alexey-milovidov May 15, 2026
e549149
firebolt{,-parquet,-parquet-partitioned}: bring up to PR #860 per-ste…
alexey-milovidov May 15, 2026
d3ade32
playground: docker default bridge in nat-unprotected mode
alexey-milovidov May 15, 2026
f8bfb2b
playground: disable dockerd iptables management entirely
alexey-milovidov May 15, 2026
a272401
firebolt{,-parquet,-parquet-partitioned}: dump diagnostics on healthc…
alexey-milovidov May 15, 2026
7f490f1
firebolt{,-parquet,-parquet-partitioned}: chown fb-volume to uid 1111
alexey-milovidov May 15, 2026
edafd37
playground: btrfs migration — drop manual zstd compression, doc btrfs…
alexey-milovidov May 15, 2026
2b84f22
playground: stage partitioned parquet symlinks at cwd before ./load
alexey-milovidov May 15, 2026
a52048e
clickhouse-web: bind-mount tmpfs at caches/web instead of symlinking
alexey-milovidov May 15, 2026
39fc1bf
cedardb: bump start-ready timeout 60s → 300s
alexey-milovidov May 15, 2026
c16154e
clickhouse-web: drop idempotency dance from install (always from scra…
alexey-milovidov May 15, 2026
95125ac
parseable: upgrade v2.5.12 → v2.7.2 (fixes 0-row loads)
alexey-milovidov May 15, 2026
09d4e46
playground: actually isolate disable_internet VMs (FORWARD DROP catch…
alexey-milovidov May 15, 2026
061ccd6
polars: avoid backslash inside f-string expression in query script
alexey-milovidov May 15, 2026
27a1572
playground: rebuild per-system rootfs+sysdisk when base is newer
alexey-milovidov May 15, 2026
9a04fce
playground: add INSTALL.md with end-to-end setup instructions
alexey-milovidov May 15, 2026
fa27fbd
siglens: rename queries.spl → queries.sql
alexey-milovidov May 15, 2026
8a18b66
playground: replace .preserve-state marker with benchmark.sh vars
alexey-milovidov May 16, 2026
8aa9bd6
playground: restore docker0 MASQUERADE inside VMs (presto/cloudberry …
alexey-milovidov May 16, 2026
f993b92
cedardb-parquet: align start-ready timeout with cedardb (60s → 600s)
alexey-milovidov May 16, 2026
c00594b
playground: honor BENCH_CHECK_TIMEOUT in agent's post-start probe
alexey-milovidov May 16, 2026
8220f83
kinetica: lower RAM-tier cap from 27 GB to 9 GB
alexey-milovidov May 16, 2026
66e03f2
umbra: dump memory + swap + container cgroup state on start
alexey-milovidov May 16, 2026
c3a674f
trino-datalake{,-partitioned}: bump BENCH_CHECK_TIMEOUT 1800 → 3600
alexey-milovidov May 16, 2026
b9f4983
playground: per-system VM RAM override; bump umbra to 32 GiB
alexey-milovidov May 16, 2026
3cf6295
Revert "playground: per-system VM RAM override; bump umbra to 32 GiB"
alexey-milovidov May 16, 2026
c19ca4d
umbra: unlimited memlock + dump sysctl/memlock in start diagnostics
alexey-milovidov May 16, 2026
4fddcde
trino-partitioned, presto-partitioned: bind-mount datasets_ro into co…
alexey-milovidov May 16, 2026
94cab89
trino-datalake: dump container state + docker logs after start
alexey-milovidov May 16, 2026
f29d425
firebolt-parquet-partitioned: bind-mount datasets_ro into container
alexey-milovidov May 16, 2026
437f6f4
druid: fail load if hits still empty after polling; extend budget to 4 h
alexey-milovidov May 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions byconity/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,10 @@ export BENCH_DURABLE=yes
# dependency, so the worst-case cold start is several minutes; the
# lib's 300s default has timed out before server is up.
export BENCH_CHECK_TIMEOUT=1200
# After firecracker snapshot+restore the cluster's
# internal connections (brpc/gossip) are stale; ./start's
# shallow health probe doesn't notice and short-circuits.
# Tell the playground agent to ./stop the cluster before
# ./start so the next bring-up is from a clean state.
export PLAYGROUND_RESTART_AFTER_RESTORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
9 changes: 7 additions & 2 deletions cedardb-parquet/start
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@ if ! sudo docker run -d --rm -p 5432:5432 \
exit 1
fi

for _ in $(seq 1 60); do
# First-boot initdb inside the container takes well over a minute
# (observed ~90-120 s of "Fixing permissions"/"Setting up database
# directory" before postgres actually listens). Give it 10 min —
# pg_isready exits fast once the daemon is up, so this only
# matters in the failure path.
for _ in $(seq 1 600); do
pg_isready -h localhost --dbname postgres -U postgres >/dev/null 2>&1 && exit 0
sleep 1
done
echo "cedardb did not become ready in 60 s; container logs:" >&2
echo "cedardb did not become ready in 600 s; container logs:" >&2
sudo docker logs cedardb 2>&1 | tail -40 >&2 || true
exit 1
9 changes: 7 additions & 2 deletions cedardb/start
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,15 @@ if ! sudo docker run -d --rm -p 5432:5432 \
exit 1
fi

for _ in $(seq 1 60); do
# First-boot initdb inside the container can run for well over a
# minute (observed ~90-120 s of "Fixing permissions"/"Setting up
# database directory" before postgres actually listens). Older
# 60 s budget bailed during that phase. Give it 5 min — pg_isready
# exits fast once the daemon is up so this only matters on failure.
for _ in $(seq 1 600); do
pg_isready -h localhost --dbname postgres -U postgres >/dev/null 2>&1 && exit 0
sleep 1
done
echo "cedardb did not become ready in 60 s; container logs:" >&2
echo "cedardb did not become ready in 600 s; container logs:" >&2
sudo docker logs cedardb 2>&1 | tail -40 >&2 || true
exit 1
5 changes: 5 additions & 0 deletions chdb-dataframe/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single"
export BENCH_DURABLE=no
# Skip the pre-snapshot ./stop+./start cycle: the loaded
# state lives only in the daemon's process memory (in-process
# DataFrame, JVM heap caches) and stopping wipes it. The
# playground agent reads this and snapshots the running daemon.
export PLAYGROUND_SKIP_RESTART_BEFORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
2 changes: 1 addition & 1 deletion clickhouse-web/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,5 @@ ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955'
PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
)
ENGINE = MergeTree
SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size_ratio_to_total_space = 0.9,
SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/caches/web/', max_size_ratio_to_total_space = 0.9,
disk = disk(type = web, endpoint = 'https://clickhouse-public-datasets.s3.amazonaws.com/web/'));
18 changes: 15 additions & 3 deletions clickhouse-web/install
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ if [ ! -x /usr/bin/clickhouse ]; then
sudo ./clickhouse install --noninteractive
fi

# Cache directory used by the web disk.
sudo mkdir -p /dev/shm/clickhouse
sudo chown clickhouse:clickhouse /dev/shm/clickhouse
# Cache directory used by the web disk. ClickHouse rejects any
# filesystem-cache path outside /var/lib/clickhouse/caches/ with
# BAD_ARGUMENTS at CREATE TABLE time, but we still want the actual
# bytes to live in tmpfs (/dev/shm) for the speed: cold queries
# pull ~1 GB on first run and tmpfs avoids touching the host SSD.
#
# Newer ClickHouse versions canonicalise the path before the policy
# check, so the older symlink trick (caches/web → /dev/shm/...) is
# rejected with BAD_ARGUMENTS. Bind-mount tmpfs at the
# policy-acceptable path instead — to CH the cache dir *is*
# /var/lib/clickhouse/caches/web with no symlink to resolve.
sudo mkdir -p /dev/shm/clickhouse /var/lib/clickhouse/caches/web
sudo chown clickhouse:clickhouse /dev/shm/clickhouse /var/lib/clickhouse/caches/web
sudo mount --bind /dev/shm/clickhouse /var/lib/clickhouse/caches/web
sudo chown clickhouse:clickhouse /var/lib/clickhouse/caches/web
5 changes: 5 additions & 0 deletions daft-parquet-partitioned/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned"
export BENCH_DURABLE=no
# Skip the pre-snapshot ./stop+./start cycle: the loaded
# state lives only in the daemon's process memory (in-process
# DataFrame, JVM heap caches) and stopping wipes it. The
# playground agent reads this and snapshots the running daemon.
export PLAYGROUND_SKIP_RESTART_BEFORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
Empty file removed daft-parquet/.preserve-state
Empty file.
5 changes: 5 additions & 0 deletions daft-parquet/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single"
export BENCH_DURABLE=no
# Skip the pre-snapshot ./stop+./start cycle: the loaded
# state lives only in the daemon's process memory (in-process
# DataFrame, JVM heap caches) and stopping wipes it. The
# playground agent reads this and snapshots the running daemon.
export PLAYGROUND_SKIP_RESTART_BEFORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
18 changes: 16 additions & 2 deletions druid/load
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@ DRUID_DIR="apache-druid-${VERSION}"
# datasource).
"./${DRUID_DIR}/bin/post-index-task" --file ingest.json --url http://localhost:8081 || true

# Wait until the hits datasource is queryable.
for _ in $(seq 1 600); do
# Wait until the hits datasource is queryable. Druid's index task can
# legitimately take hours on a 16 GiB VM; budget 4 h here, and fail
# loudly if hits still isn't queryable so the agent doesn't take a
# snapshot of a half-ingested datasource (which would otherwise look
# "snapshotted" but every query returns
# druidException ... Object 'hits' not found
# at runtime).
cnt=""
for _ in $(seq 1 2880); do # 2880 * 5s = 4 h
cnt=$(curl -sf -XPOST -H'Content-Type: application/json' \
http://localhost:8888/druid/v2/sql/ \
-d '{"query": "SELECT COUNT(*) FROM hits"}' 2>/dev/null \
Expand All @@ -22,6 +29,13 @@ for _ in $(seq 1 600); do
fi
sleep 5
done
if [ -z "$cnt" ] || [ "$cnt" -le 0 ]; then
echo "druid: hits datasource still not queryable after 4 h; ingestion" >&2
echo "did not finish. Dumping recent task list for diagnosis:" >&2
curl -sS http://localhost:8081/druid/indexer/v1/tasks 2>&1 | head -c 2000 >&2
exit 1
fi
echo "druid: hits has $cnt rows after ingestion"

rm -f hits.tsv
sync
Empty file removed duckdb-dataframe/.preserve-state
Empty file.
5 changes: 5 additions & 0 deletions duckdb-dataframe/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single"
export BENCH_DURABLE=no
# Skip the pre-snapshot ./stop+./start cycle: the loaded
# state lives only in the daemon's process memory (in-process
# DataFrame, JVM heap caches) and stopping wipes it. The
# playground agent reads this and snapshots the running daemon.
export PLAYGROUND_SKIP_RESTART_BEFORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
53 changes: 5 additions & 48 deletions firebolt-parquet-partitioned/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,49 +1,6 @@
#!/bin/bash

# Download the partitioned hits parquet files
echo "Downloading dataset..."
rm -rf data
../lib/download-hits-parquet-partitioned data

# Start the container
sudo apt-get install -y docker.io jq
sudo docker run -dit --name firebolt-core --rm \
--ulimit memlock=8589934592:8589934592 \
--security-opt seccomp=unconfined \
-p 127.0.0.1:3473:3473 \
-v /firebolt-core/volume \
-v ./data/:/firebolt-core/clickbench \
ghcr.io/firebolt-db/firebolt-core:preview-rc

# See firebolt/benchmark.sh — the old curl-and-break pattern accepted the
# "Cluster not yet healthy" JSON error body as success.
for _ in {1..600}
do
if curl -sS "http://localhost:3473/" \
--data-binary "SELECT 'Firebolt is ready';" 2>/dev/null \
| grep -q "Firebolt is ready"; then
break
fi
sleep 1
done

# Create the database and external table
echo "Creating external table..."
curl -sS "http://localhost:3473/?enable_multi_query_requests=true" --data-binary "DROP DATABASE IF EXISTS clickbench;CREATE DATABASE clickbench;"
curl -sS "http://localhost:3473/?database=clickbench&enable_multi_query_requests=true" --data-binary @create.sql

# Print statistics
DATA_SIZE=$(du -bcs data/hits_*.parquet 2>/dev/null | grep total | awk '{print $1}')
if [ -z "$DATA_SIZE" ]; then
DATA_SIZE=$(du -cs data/hits_*.parquet | grep total | awk '{print $1}')
fi
echo "Load time: 0"
echo "Data size: $DATA_SIZE"

# Run the benchmark
echo "Running the benchmark..."
./run.sh

# Stop the container and remove the data
sudo docker container stop firebolt-core
rm -rf data
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned"
export BENCH_DURABLE=no
export BENCH_RESTARTABLE=no
exec ../lib/benchmark-common.sh
7 changes: 7 additions & 0 deletions firebolt-parquet-partitioned/check
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
set -e

# Firebolt-core's HTTP port answers immediately but may return a
# cluster-not-ready JSON error at HTTP 200. Test for an actual result.
curl -sSf --max-time 5 'http://localhost:3473/' \
--data-binary 'SELECT 1;' 2>/dev/null | grep -q '^1'
6 changes: 6 additions & 0 deletions firebolt-parquet-partitioned/data-size
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
set -e

# Firebolt-core writes its database state under /firebolt-core/volume
# inside the container, which we bind-mount to ./fb-volume on the host.
du -bcs fb-volume 2>/dev/null | awk '/total$/ { print $1 }'
6 changes: 6 additions & 0 deletions firebolt-parquet-partitioned/install
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
set -eu

sudo apt-get update -y
sudo apt-get install -y docker.io jq
sudo docker pull ghcr.io/firebolt-db/firebolt-core:preview-rc
20 changes: 20 additions & 0 deletions firebolt-parquet-partitioned/load
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
set -eu

# Partitioned-parquet variant: stage hits_*.parquet under ./data so
# the container sees them at /firebolt-core/clickbench/*.parquet;
# create.sql declares an external table with FROM PATTERN that
# matches the glob.
mkdir -p data
shopt -s nullglob
for f in hits_*.parquet; do
mv -f "$f" "data/$f"
done
shopt -u nullglob

curl -sSf 'http://localhost:3473/?enable_multi_query_requests=true' \
--data-binary 'DROP DATABASE IF EXISTS clickbench;CREATE DATABASE clickbench;'
curl -sSf 'http://localhost:3473/?database=clickbench&enable_multi_query_requests=true' \
--data-binary @create.sql

sync
28 changes: 28 additions & 0 deletions firebolt-parquet-partitioned/query
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
# Reads a SQL query from stdin, runs it against the firebolt-core
# container via /?database=clickbench.
# Stdout: query result (firebolt's JSON_Compact format).
# Stderr: query runtime in fractional seconds on the last line,
# pulled from the response's `.statistics.elapsed`.
# Exit non-zero on error.
set -e

query=$(cat)

# Result + sub-result caches off so timings are real; output_format
# matches what firebolt's run.sh uses for the public benchmark.
PARAMS='database=clickbench&enable_result_cache=false&enable_subresult_cache=false&enable_scan_cache=false&output_format=JSON_Compact'

resp=$(curl -sS --max-time 600 "http://localhost:3473/?${PARAMS}" \
--data-binary "$query")

# Firebolt returns a JSON object whether the query succeeded or not.
# A failed query has an "errors" key; a successful one carries
# "data" + "statistics".
if printf '%s' "$resp" | jq -e '.errors' >/dev/null 2>&1; then
printf '%s\n' "$resp" >&2
exit 1
fi

printf '%s\n' "$resp"
printf '%s\n' "$resp" | jq -r '.statistics.elapsed' >&2
18 changes: 0 additions & 18 deletions firebolt-parquet-partitioned/run.sh

This file was deleted.

76 changes: 76 additions & 0 deletions firebolt-parquet-partitioned/start
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash
set -eu

# Idempotent: if firebolt-core already answers SELECT 1, do nothing.
if curl -sS --max-time 5 'http://localhost:3473/' \
--data-binary 'SELECT 1;' 2>/dev/null | grep -q '^1'; then
exit 0
fi

mkdir -p data fb-volume
# firebolt-core runs as UID/GID 1111 inside the container and refuses
# to start if its data dir is not writeable by that uid (the engine
# self-checks and aborts with "directory ... is not readable or
# writeable by the Firebolt Core process"). Set the host-side
# ownership accordingly so the bind-mounted dir is usable.
sudo chown 1111:1111 fb-volume

# If the container exists (stopped from a prior agent pre-snapshot
# cycle), just start it back — the data lives on the bind-mounted
# fb-volume below, so the previously-created `clickbench` database
# is still there. Otherwise create the container fresh.
if sudo docker ps -a --format '{{.Names}}' | grep -qx firebolt-core; then
sudo docker start firebolt-core >/dev/null
else
# `firebolt-core` is the public self-hosted image. Container needs
# memlock 8 GiB and seccomp unconfined per upstream's run docs.
# /firebolt-core/clickbench: parquet source (read at load time).
# /firebolt-core/volume: engine data directory (must persist
# across the agent's pre-snapshot
# stop+start cycle or the snapshot
# ships an empty DB).
# The agent stages partitioned parquet at $PWD as symlinks pointing
# at /opt/clickbench/datasets_ro/hits_partitioned/hits_N.parquet
# (an absolute host-VM path). `./load` then `mv`s those symlinks
# into data/, but inside the container the absolute target is
# unreachable and the symlinks dangle — load reports success but
# the external table sees zero files and every query returns
# rows_read=0. Bind-mount the dataset disk into the container at
# the same absolute path so the symlinks resolve.
sudo docker run -dit --name firebolt-core \
--ulimit memlock=8589934592:8589934592 \
--security-opt seccomp=unconfined \
-p 127.0.0.1:3473:3473 \
-v "$(pwd)/fb-volume:/firebolt-core/volume" \
-v "$(pwd)/data:/firebolt-core/clickbench" \
-v "/opt/clickbench/datasets_ro:/opt/clickbench/datasets_ro:ro" \
ghcr.io/firebolt-db/firebolt-core:preview-rc >/dev/null
fi

# Wait for the cluster to be "actually" ready. firebolt-core's HTTP
# port comes up immediately but returns
# {"errors":[{"description":"Cluster not yet healthy: ..."}]}
# at HTTP 200 until the engine threads have warmed; bench against a
# sentinel string instead of HTTP status to avoid that trap.
for _ in $(seq 1 600); do
if curl -sS --max-time 5 'http://localhost:3473/' \
--data-binary "SELECT 'firebolt-ready';" 2>/dev/null \
| grep -q 'firebolt-ready'; then
exit 0
fi
sleep 1
done
{
echo "firebolt-core did not become healthy in 10 min"
echo "=== docker ps -a ==="
sudo docker ps -a 2>&1
echo "=== docker inspect firebolt-core (state) ==="
sudo docker inspect firebolt-core --format '{{json .State}}' 2>&1
echo "=== docker logs firebolt-core --tail 50 ==="
sudo docker logs firebolt-core --tail 50 2>&1
echo "=== curl http://localhost:3473/ ==="
curl -sS --max-time 3 'http://localhost:3473/' --data-binary 'SELECT 1' 2>&1
echo "=== ss listeners ==="
sudo ss -lntp 2>&1 | head -20
} >&2
exit 1
8 changes: 8 additions & 0 deletions firebolt-parquet-partitioned/stop
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
set -e

# Plain stop — leave the container in place so its bind-mounted
# fb-volume keeps the loaded database for the next ./start. The
# container is removed and the volume re-initialised only on
# explicit re-provision.
sudo docker container stop firebolt-core >/dev/null 2>&1 || true
Loading