Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions byconity/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,10 @@ export BENCH_DURABLE=yes
# dependency, so the worst-case cold start is several minutes; the
# lib's 300s default has timed out before server is up.
export BENCH_CHECK_TIMEOUT=1200
# After firecracker snapshot+restore the cluster's
# internal connections (brpc/gossip) are stale; ./start's
# shallow health probe doesn't notice and short-circuits.
# Tell the playground agent to ./stop the cluster before
# ./start so the next bring-up is from a clean state.
export PLAYGROUND_RESTART_AFTER_RESTORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
9 changes: 7 additions & 2 deletions cedardb-parquet/start
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@ if ! sudo docker run -d --rm -p 5432:5432 \
exit 1
fi

for _ in $(seq 1 60); do
# First-boot initdb inside the container takes well over a minute
# (observed ~90-120 s of "Fixing permissions"/"Setting up database
# directory" before postgres actually listens). Give it 10 min —
# pg_isready exits fast once the daemon is up, so this only
# matters in the failure path.
for _ in $(seq 1 600); do
pg_isready -h localhost --dbname postgres -U postgres >/dev/null 2>&1 && exit 0
sleep 1
done
echo "cedardb did not become ready in 60 s; container logs:" >&2
echo "cedardb did not become ready in 600 s; container logs:" >&2
sudo docker logs cedardb 2>&1 | tail -40 >&2 || true
exit 1
9 changes: 7 additions & 2 deletions cedardb/start
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,15 @@ if ! sudo docker run -d --rm -p 5432:5432 \
exit 1
fi

for _ in $(seq 1 60); do
# First-boot initdb inside the container can run for well over a
# minute (observed ~90-120 s of "Fixing permissions"/"Setting up
# database directory" before postgres actually listens). Older
# 60 s budget bailed during that phase. Give it 5 min — pg_isready
# exits fast once the daemon is up so this only matters on failure.
for _ in $(seq 1 600); do
pg_isready -h localhost --dbname postgres -U postgres >/dev/null 2>&1 && exit 0
sleep 1
done
echo "cedardb did not become ready in 60 s; container logs:" >&2
echo "cedardb did not become ready in 600 s; container logs:" >&2
sudo docker logs cedardb 2>&1 | tail -40 >&2 || true
exit 1
5 changes: 5 additions & 0 deletions chdb-dataframe/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single"
export BENCH_DURABLE=no
# Skip the pre-snapshot ./stop+./start cycle: the loaded
# state lives only in the daemon's process memory (in-process
# DataFrame, JVM heap caches) and stopping wipes it. The
# playground agent reads this and snapshots the running daemon.
export PLAYGROUND_SKIP_RESTART_BEFORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
2 changes: 1 addition & 1 deletion clickhouse-web/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,5 @@ ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955'
PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
)
ENGINE = MergeTree
SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size_ratio_to_total_space = 0.9,
SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/caches/web/', max_size_ratio_to_total_space = 0.9,
disk = disk(type = web, endpoint = 'https://clickhouse-public-datasets.s3.amazonaws.com/web/'));
18 changes: 15 additions & 3 deletions clickhouse-web/install
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ if [ ! -x /usr/bin/clickhouse ]; then
sudo ./clickhouse install --noninteractive
fi

# Cache directory used by the web disk.
sudo mkdir -p /dev/shm/clickhouse
sudo chown clickhouse:clickhouse /dev/shm/clickhouse
# Cache directory used by the web disk. ClickHouse rejects any
# filesystem-cache path outside /var/lib/clickhouse/caches/ with
# BAD_ARGUMENTS at CREATE TABLE time, but we still want the actual
# bytes to live in tmpfs (/dev/shm) for the speed: cold queries
# pull ~1 GB on first run and tmpfs avoids touching the host SSD.
#
# Newer ClickHouse versions canonicalise the path before the policy
# check, so the older symlink trick (caches/web → /dev/shm/...) is
# rejected with BAD_ARGUMENTS. Bind-mount tmpfs at the
# policy-acceptable path instead — to CH the cache dir *is*
# /var/lib/clickhouse/caches/web with no symlink to resolve.
sudo mkdir -p /dev/shm/clickhouse /var/lib/clickhouse/caches/web
sudo chown clickhouse:clickhouse /dev/shm/clickhouse /var/lib/clickhouse/caches/web
sudo mount --bind /dev/shm/clickhouse /var/lib/clickhouse/caches/web
sudo chown clickhouse:clickhouse /var/lib/clickhouse/caches/web
5 changes: 5 additions & 0 deletions daft-parquet-partitioned/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned"
export BENCH_DURABLE=no
# Skip the pre-snapshot ./stop+./start cycle: the loaded
# state lives only in the daemon's process memory (in-process
# DataFrame, JVM heap caches) and stopping wipes it. The
# playground agent reads this and snapshots the running daemon.
export PLAYGROUND_SKIP_RESTART_BEFORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
Empty file removed daft-parquet/.preserve-state
Empty file.
5 changes: 5 additions & 0 deletions daft-parquet/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single"
export BENCH_DURABLE=no
# Skip the pre-snapshot ./stop+./start cycle: the loaded
# state lives only in the daemon's process memory (in-process
# DataFrame, JVM heap caches) and stopping wipes it. The
# playground agent reads this and snapshots the running daemon.
export PLAYGROUND_SKIP_RESTART_BEFORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
18 changes: 16 additions & 2 deletions druid/load
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@ DRUID_DIR="apache-druid-${VERSION}"
# datasource).
"./${DRUID_DIR}/bin/post-index-task" --file ingest.json --url http://localhost:8081 || true

# Wait until the hits datasource is queryable.
for _ in $(seq 1 600); do
# Wait until the hits datasource is queryable. Druid's index task can
# legitimately take hours on a 16 GiB VM; budget 4 h here, and fail
# loudly if hits still isn't queryable so the agent doesn't take a
# snapshot of a half-ingested datasource (which would otherwise look
# "snapshotted" but every query returns
# druidException ... Object 'hits' not found
# at runtime).
cnt=""
for _ in $(seq 1 2880); do # 2880 * 5s = 4 h
cnt=$(curl -sf -XPOST -H'Content-Type: application/json' \
http://localhost:8888/druid/v2/sql/ \
-d '{"query": "SELECT COUNT(*) FROM hits"}' 2>/dev/null \
Expand All @@ -22,6 +29,13 @@ for _ in $(seq 1 600); do
fi
sleep 5
done
if [ -z "$cnt" ] || [ "$cnt" -le 0 ]; then
echo "druid: hits datasource still not queryable after 4 h; ingestion" >&2
echo "did not finish. Dumping recent task list for diagnosis:" >&2
curl -sS http://localhost:8081/druid/indexer/v1/tasks 2>&1 | head -c 2000 >&2
exit 1
fi
echo "druid: hits has $cnt rows after ingestion"

rm -f hits.tsv
sync
Empty file removed duckdb-dataframe/.preserve-state
Empty file.
5 changes: 5 additions & 0 deletions duckdb-dataframe/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single"
export BENCH_DURABLE=no
# Skip the pre-snapshot ./stop+./start cycle: the loaded
# state lives only in the daemon's process memory (in-process
# DataFrame, JVM heap caches) and stopping wipes it. The
# playground agent reads this and snapshots the running daemon.
export PLAYGROUND_SKIP_RESTART_BEFORE_SNAPSHOT=yes
exec ../lib/benchmark-common.sh
53 changes: 5 additions & 48 deletions firebolt-parquet-partitioned/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,49 +1,6 @@
#!/bin/bash

# Download the partitioned hits parquet files
echo "Downloading dataset..."
rm -rf data
../lib/download-hits-parquet-partitioned data

# Start the container
sudo apt-get install -y docker.io jq
sudo docker run -dit --name firebolt-core --rm \
--ulimit memlock=8589934592:8589934592 \
--security-opt seccomp=unconfined \
-p 127.0.0.1:3473:3473 \
-v /firebolt-core/volume \
-v ./data/:/firebolt-core/clickbench \
ghcr.io/firebolt-db/firebolt-core:preview-rc

# See firebolt/benchmark.sh — the old curl-and-break pattern accepted the
# "Cluster not yet healthy" JSON error body as success.
for _ in {1..600}
do
if curl -sS "http://localhost:3473/" \
--data-binary "SELECT 'Firebolt is ready';" 2>/dev/null \
| grep -q "Firebolt is ready"; then
break
fi
sleep 1
done

# Create the database and external table
echo "Creating external table..."
curl -sS "http://localhost:3473/?enable_multi_query_requests=true" --data-binary "DROP DATABASE IF EXISTS clickbench;CREATE DATABASE clickbench;"
curl -sS "http://localhost:3473/?database=clickbench&enable_multi_query_requests=true" --data-binary @create.sql

# Print statistics
DATA_SIZE=$(du -bcs data/hits_*.parquet 2>/dev/null | grep total | awk '{print $1}')
if [ -z "$DATA_SIZE" ]; then
DATA_SIZE=$(du -cs data/hits_*.parquet | grep total | awk '{print $1}')
fi
echo "Load time: 0"
echo "Data size: $DATA_SIZE"

# Run the benchmark
echo "Running the benchmark..."
./run.sh

# Stop the container and remove the data
sudo docker container stop firebolt-core
rm -rf data
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned"
export BENCH_DURABLE=no
export BENCH_RESTARTABLE=no
exec ../lib/benchmark-common.sh
7 changes: 7 additions & 0 deletions firebolt-parquet-partitioned/check
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
set -e

# Firebolt-core's HTTP port answers immediately but may return a
# cluster-not-ready JSON error at HTTP 200. Test for an actual result.
curl -sSf --max-time 5 'http://localhost:3473/' \
--data-binary 'SELECT 1;' 2>/dev/null | grep -q '^1'
6 changes: 6 additions & 0 deletions firebolt-parquet-partitioned/data-size
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
set -e

# Firebolt-core writes its database state under /firebolt-core/volume
# inside the container, which we bind-mount to ./fb-volume on the host.
du -bcs fb-volume 2>/dev/null | awk '/total$/ { print $1 }'
6 changes: 6 additions & 0 deletions firebolt-parquet-partitioned/install
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
set -eu

sudo apt-get update -y
sudo apt-get install -y docker.io jq
sudo docker pull ghcr.io/firebolt-db/firebolt-core:preview-rc
20 changes: 20 additions & 0 deletions firebolt-parquet-partitioned/load
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
set -eu

# Partitioned-parquet variant: stage hits_*.parquet under ./data so
# the container sees them at /firebolt-core/clickbench/*.parquet;
# create.sql declares an external table with FROM PATTERN that
# matches the glob.
mkdir -p data
shopt -s nullglob
for f in hits_*.parquet; do
mv -f "$f" "data/$f"
done
shopt -u nullglob

curl -sSf 'http://localhost:3473/?enable_multi_query_requests=true' \
--data-binary 'DROP DATABASE IF EXISTS clickbench;CREATE DATABASE clickbench;'
curl -sSf 'http://localhost:3473/?database=clickbench&enable_multi_query_requests=true' \
--data-binary @create.sql

sync
28 changes: 28 additions & 0 deletions firebolt-parquet-partitioned/query
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
# Reads a SQL query from stdin, runs it against the firebolt-core
# container via /?database=clickbench.
# Stdout: query result (firebolt's JSON_Compact format).
# Stderr: query runtime in fractional seconds on the last line,
# pulled from the response's `.statistics.elapsed`.
# Exit non-zero on error.
set -e

query=$(cat)

# Result + sub-result caches off so timings are real; output_format
# matches what firebolt's run.sh uses for the public benchmark.
PARAMS='database=clickbench&enable_result_cache=false&enable_subresult_cache=false&enable_scan_cache=false&output_format=JSON_Compact'

resp=$(curl -sS --max-time 600 "http://localhost:3473/?${PARAMS}" \
--data-binary "$query")

# Firebolt returns a JSON object whether the query succeeded or not.
# A failed query has an "errors" key; a successful one carries
# "data" + "statistics".
if printf '%s' "$resp" | jq -e '.errors' >/dev/null 2>&1; then
printf '%s\n' "$resp" >&2
exit 1
fi

printf '%s\n' "$resp"
printf '%s\n' "$resp" | jq -r '.statistics.elapsed' >&2
18 changes: 0 additions & 18 deletions firebolt-parquet-partitioned/run.sh

This file was deleted.

76 changes: 76 additions & 0 deletions firebolt-parquet-partitioned/start
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash
set -eu

# Idempotent: if firebolt-core already answers SELECT 1, do nothing.
if curl -sS --max-time 5 'http://localhost:3473/' \
--data-binary 'SELECT 1;' 2>/dev/null | grep -q '^1'; then
exit 0
fi

mkdir -p data fb-volume
# firebolt-core runs as UID/GID 1111 inside the container and refuses
# to start if its data dir is not writeable by that uid (the engine
# self-checks and aborts with "directory ... is not readable or
# writeable by the Firebolt Core process"). Set the host-side
# ownership accordingly so the bind-mounted dir is usable.
sudo chown 1111:1111 fb-volume

# If the container exists (stopped from a prior agent pre-snapshot
# cycle), just start it back — the data lives on the bind-mounted
# fb-volume below, so the previously-created `clickbench` database
# is still there. Otherwise create the container fresh.
if sudo docker ps -a --format '{{.Names}}' | grep -qx firebolt-core; then
sudo docker start firebolt-core >/dev/null
else
# `firebolt-core` is the public self-hosted image. Container needs
# memlock 8 GiB and seccomp unconfined per upstream's run docs.
# /firebolt-core/clickbench: parquet source (read at load time).
# /firebolt-core/volume: engine data directory (must persist
# across the agent's pre-snapshot
# stop+start cycle or the snapshot
# ships an empty DB).
# The agent stages partitioned parquet at $PWD as symlinks pointing
# at /opt/clickbench/datasets_ro/hits_partitioned/hits_N.parquet
# (an absolute host-VM path). `./load` then `mv`s those symlinks
# into data/, but inside the container the absolute target is
# unreachable and the symlinks dangle — load reports success but
# the external table sees zero files and every query returns
# rows_read=0. Bind-mount the dataset disk into the container at
# the same absolute path so the symlinks resolve.
sudo docker run -dit --name firebolt-core \
--ulimit memlock=8589934592:8589934592 \
--security-opt seccomp=unconfined \
-p 127.0.0.1:3473:3473 \
-v "$(pwd)/fb-volume:/firebolt-core/volume" \
-v "$(pwd)/data:/firebolt-core/clickbench" \
-v "/opt/clickbench/datasets_ro:/opt/clickbench/datasets_ro:ro" \
ghcr.io/firebolt-db/firebolt-core:preview-rc >/dev/null
fi

# Wait for the cluster to be "actually" ready. firebolt-core's HTTP
# port comes up immediately but returns
# {"errors":[{"description":"Cluster not yet healthy: ..."}]}
# at HTTP 200 until the engine threads have warmed; bench against a
# sentinel string instead of HTTP status to avoid that trap.
for _ in $(seq 1 600); do
if curl -sS --max-time 5 'http://localhost:3473/' \
--data-binary "SELECT 'firebolt-ready';" 2>/dev/null \
| grep -q 'firebolt-ready'; then
exit 0
fi
sleep 1
done
{
echo "firebolt-core did not become healthy in 10 min"
echo "=== docker ps -a ==="
sudo docker ps -a 2>&1
echo "=== docker inspect firebolt-core (state) ==="
sudo docker inspect firebolt-core --format '{{json .State}}' 2>&1
echo "=== docker logs firebolt-core --tail 50 ==="
sudo docker logs firebolt-core --tail 50 2>&1
echo "=== curl http://localhost:3473/ ==="
curl -sS --max-time 3 'http://localhost:3473/' --data-binary 'SELECT 1' 2>&1
echo "=== ss listeners ==="
sudo ss -lntp 2>&1 | head -20
} >&2
exit 1
8 changes: 8 additions & 0 deletions firebolt-parquet-partitioned/stop
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
set -e

# Plain stop — leave the container in place so its bind-mounted
# fb-volume keeps the loaded database for the next ./start. The
# container is removed and the volume re-initialised only on
# explicit re-provision.
sudo docker container stop firebolt-core >/dev/null 2>&1 || true
Loading