From 1b565d896b03b20437664eec840e9cb681213e76 Mon Sep 17 00:00:00 2001 From: konard Date: Fri, 19 Jun 2026 10:31:26 +0000 Subject: [PATCH 1/4] Initial commit with task details Adding .gitkeep for PR creation (default mode). This file will be removed when the task is complete. Issue: https://github.com/link-foundation/box/issues/106 --- .gitkeep | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitkeep diff --git a/.gitkeep b/.gitkeep new file mode 100644 index 0000000..d89aa44 --- /dev/null +++ b/.gitkeep @@ -0,0 +1 @@ +# .gitkeep file auto-generated at 2026-06-19T10:31:26.211Z for PR creation at branch issue-106-81bf2d7bb66f for issue https://github.com/link-foundation/box/issues/106 \ No newline at end of file From 0a681e1770d56f81bcdcba4f18bbb0306141bc8a Mon Sep 17 00:00:00 2001 From: konard Date: Fri, 19 Jun 2026 10:43:51 +0000 Subject: [PATCH 2/4] dind-box: verify host-image passthrough seeded the nested daemon; stop falsely reporting 'complete' (issue #106) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit box-dind kept re-downloading multi-GB host images on first nested 'docker run' even with DIND_HOST_PASSTHROUGH_IMAGES set, while the entrypoint still printed 'image preload/passthrough complete' — so a misconfigured deployment (forgotten host docker socket mount, host missing that exact ref, or the mode=public filter dropping a locally-built/private image) looked healthy right up until the slow re-pull (the recurring symptom behind closed issues #94 and #102). The entrypoint now verifies the copy after passthrough: for every concrete allowlist entry (explicit tag or @sha256: digest — bare repos and globs are skipped to avoid false alarms) it runs 'docker image inspect' against the nested daemon. A missing expected image now produces a loud, actionable warning (host socket reachable but lacks the ref / filtered by mode, or no usable socket mounted) and the terminal line becomes 'image preload/passthrough finished WITH WARNINGS' instead of the misleading 'complete'. No silent no-op path can report success anymore. We do not auto-pull as a fallback — that would mask the config error and incur the same multi-GB download; the re-pull still happens naturally. Adds ref_is_concrete() + verify_passthrough_images() helpers, new unit cases in experiments/preload-unit-test.sh (62 passing), verify_ok/verify_miss assertions in the CI-run tests/dind/example-preload-images.sh, and docs in docs/dind/USAGE.md and README.md. --- .changeset/issue-106-passthrough-verify.md | 11 +++ README.md | 2 +- docs/dind/USAGE.md | 31 ++++++- experiments/preload-unit-test.sh | 96 ++++++++++++++++++---- tests/dind/example-preload-images.sh | 61 +++++++++++++- ubuntu/24.04/dind/dind-entrypoint.sh | 77 ++++++++++++++++- 6 files changed, 257 insertions(+), 21 deletions(-) create mode 100644 .changeset/issue-106-passthrough-verify.md diff --git a/.changeset/issue-106-passthrough-verify.md b/.changeset/issue-106-passthrough-verify.md new file mode 100644 index 0000000..59b1313 --- /dev/null +++ b/.changeset/issue-106-passthrough-verify.md @@ -0,0 +1,11 @@ +--- +bump: patch +--- + +dind-box: verify host-image passthrough actually seeded the nested daemon, and stop falsely reporting success when it did not (issue #106). + +box-dind kept re-downloading multi-GB host images (~30 GB, ~1 hour) on first nested `docker run` even with `DIND_HOST_PASSTHROUGH_IMAGES` set, while the entrypoint still printed `image preload/passthrough complete` — so a misconfigured deployment (forgotten `-v /var/run/docker.sock:…:ro` mount, host missing that exact ref, or the `mode=public` filter dropping a locally-built/private image) looked healthy right up until the slow re-pull. This is the recurring symptom behind closed issues #94 and #102. + +The entrypoint now verifies the copy after passthrough: for every **concrete** allowlist entry (explicit tag or `@sha256:` digest — bare repos and globs are skipped to avoid false alarms) it runs `docker image inspect` against the nested daemon. If an expected image is absent it emits a loud, actionable warning (whether the host socket is reachable but lacks the ref / was filtered by the mode, or no usable socket is mounted) and the completion line becomes `image preload/passthrough finished WITH WARNINGS` instead of the misleading `complete`. No silent no-op path can report success anymore. Re-pull still happens naturally — we do not auto-pull, which would mask the config error and incur the same multi-GB download. + +Covered by new cases in `experiments/preload-unit-test.sh` and new `verify_ok`/`verify_miss` assertions in the CI-run `tests/dind/example-preload-images.sh`; deployment wiring and verification behavior documented in `docs/dind/USAGE.md` and `README.md`. diff --git a/README.md b/README.md index 2ab111e..959aee3 100644 --- a/README.md +++ b/README.md @@ -218,7 +218,7 @@ Each row below has the same toolchain as its non-dind sibling **plus** a working > - **Recommended secure invocation:** [`docker run --runtime=sysbox-runc konard/box-dind`](https://github.com/nestybox/sysbox) — Sysbox is a drop-in OCI runtime that runs system containers without `--privileged` and without exposing host devices. > - **Do NOT bind-mount `/var/run/docker.sock`.** That gives the container root on the host ([Quarkslab](https://blog.quarkslab.com/why-is-exposing-the-docker-socket-a-really-bad-idea.html), [OWASP](https://cheatsheetseries.owasp.org/cheatsheets/Docker_Security_Cheat_Sheet.html)) and breaks the per-box `docker ps` scoping property. > - **Storage:** the inner daemon writes to `/var/lib/docker` inside the container by default. For persistence, mount a volume: `-v box-dind-data:/var/lib/docker`. -> - **Reusing host images:** the nested daemon starts with an empty image store, so a fresh container re-downloads images the host already has. Seed it explicitly at startup with `DIND_PRELOAD_TARBALL` (mount `docker save` tarballs) or `DIND_PRELOAD_IMAGES` (pull from a registry/mirror); see [Reusing Host Images](docs/dind/USAGE.md#reusing-host-images-preload). For automatic seeding, mount the host socket at `-v /var/run/docker.sock:/var/run/host-docker.sock:ro` — host-image passthrough is on by default and copies the host's **public** images (those re-pullable from a public registry, so no local secrets or private credentials leak) into the inner daemon; `DIND_HOST_PASSTHROUGH=all` passes everything and `=off` disables it. To copy only specific images rather than every public one, set `DIND_HOST_PASSTHROUGH_IMAGES` to a space-separated allowlist of names/globs (e.g. `"konard/hive-mind konard/hive-mind-dind"`), composed with the mode filter. The host socket is mounted at a non-default path and read only at startup to seed images, so the inner daemon keeps its own isolated socket. See [Host-Image Passthrough](docs/dind/USAGE.md#host-image-passthrough-dind_host_passthrough). +> - **Reusing host images:** the nested daemon starts with an empty image store, so a fresh container re-downloads images the host already has. Seed it explicitly at startup with `DIND_PRELOAD_TARBALL` (mount `docker save` tarballs) or `DIND_PRELOAD_IMAGES` (pull from a registry/mirror); see [Reusing Host Images](docs/dind/USAGE.md#reusing-host-images-preload). For automatic seeding, mount the host socket at `-v /var/run/docker.sock:/var/run/host-docker.sock:ro` — host-image passthrough is on by default and copies the host's **public** images (those re-pullable from a public registry, so no local secrets or private credentials leak) into the inner daemon; `DIND_HOST_PASSTHROUGH=all` passes everything and `=off` disables it. To copy only specific images rather than every public one, set `DIND_HOST_PASSTHROUGH_IMAGES` to a space-separated allowlist of names/globs (e.g. `"konard/hive-mind konard/hive-mind-dind"`), composed with the mode filter. Pin a concrete tag/digest (e.g. `konard/hive-mind-dind:2.0.6`) and the entrypoint verifies the image actually landed in the nested daemon after passthrough — if it did not (forgotten socket mount, host missing that ref, or the mode filter dropped it) it warns loudly instead of falsely reporting "complete", so you are not surprised by a multi-GB re-pull on first run (issue #106). The host socket is mounted at a non-default path and read only at startup to seed images, so the inner daemon keeps its own isolated socket. See [Host-Image Passthrough](docs/dind/USAGE.md#host-image-passthrough-dind_host_passthrough). > - **Usage examples:** see [`docs/dind/USAGE.md`](docs/dind/USAGE.md). Its examples are backed by executable tests under `tests/dind/`. See [docs/case-studies/issue-80/CASE-STUDY.md](docs/case-studies/issue-80/CASE-STUDY.md) for the full design and threat model. diff --git a/docs/dind/USAGE.md b/docs/dind/USAGE.md index 80aaa5b..00b8ab9 100644 --- a/docs/dind/USAGE.md +++ b/docs/dind/USAGE.md @@ -74,7 +74,7 @@ The entrypoint supports these environment variables: | `DIND_HOST_PASSTHROUGH` | `public` | Copy images already present on the host into the nested daemon at startup when a host socket is mounted (see below). `public` only passes images with a RepoDigest from an allowlisted public registry; `all` passes every tagged image; `off` disables it. A quiet no-op when no host socket is mounted. | | `DIND_HOST_DOCKER_SOCK` | `/var/run/host-docker.sock` | Path inside the container to the mounted *host* Docker socket used for passthrough. Deliberately **not** `/var/run/docker.sock`, so the inner daemon keeps its own isolated socket. | | `DIND_HOST_PASSTHROUGH_REGISTRIES` | common public registries | Space-separated allowlist of registries treated as "public" in `DIND_HOST_PASSTHROUGH=public` mode (default: `docker.io ghcr.io quay.io gcr.io registry.k8s.io public.ecr.aws mcr.microsoft.com`). | -| `DIND_HOST_PASSTHROUGH_IMAGES` | _(empty)_ | Space-separated allowlist of image references / globs. When non-empty, only host images matching at least one entry are passed through, composed with the mode filter (so `public` still requires a public RepoDigest). Empty keeps the mode + registry filter only. One level finer than `DIND_HOST_PASSTHROUGH_REGISTRIES` — scope to specific repositories / image names. | +| `DIND_HOST_PASSTHROUGH_IMAGES` | _(empty)_ | Space-separated allowlist of image references / globs. When non-empty, only host images matching at least one entry are passed through, composed with the mode filter (so `public` still requires a public RepoDigest). Empty keeps the mode + registry filter only. One level finer than `DIND_HOST_PASSTHROUGH_REGISTRIES` — scope to specific repositories / image names. Each concrete entry (explicit tag/digest) is verified present in the nested daemon after passthrough; a missing one warns loudly instead of falsely reporting "complete" (issue #106). | Use a named volume when the inner Docker state should survive container removal: @@ -271,6 +271,35 @@ the nested daemon will otherwise re-pull from the registry on the first `docker run` with no hint as to why (issue #102). Plain `box-dind` containers that never set an allowlist still see no extra noise when no socket is mounted. +### Verifying the copy actually happened (`issue #106`) + +A warning about a forgotten mount only covers one failure mode. Passthrough can +also quietly seed *nothing* for other reasons — the host does not have the image +under that exact reference, the socket is present but unreachable, or `public` +mode filtered out a locally-built image (no RepoDigest). In every case the +entrypoint used to print `image preload/passthrough complete` regardless, and +the first nested `docker run` then silently re-pulled the multi-GB image from the +registry (~30 GB, ~1 h downstream — `link-assistant/hive-mind#1914`/`#1946`). + +So after passthrough runs, each **concrete** `DIND_HOST_PASSTHROUGH_IMAGES` +entry — one with an explicit tag or digest, no glob — is verified to actually be +present in the nested daemon (`docker image inspect `). When one is +missing, the entrypoint: + +- emits a loud, actionable warning naming the un-seeded image(s) and the likely + cause (missing/unreachable socket, host lacks that exact ref, or the mode + filter dropped it — with the `DIND_HOST_PASSTHROUGH=all` remedy for + locally-built/private images), and +- ends the phase with `image preload/passthrough finished WITH WARNINGS` + instead of the misleading `...complete`, so logs never claim success when + nothing was copied. + +Bare repositories (`konard/hive-mind`) and globs (`konard/hive-mind*`) are not +concrete — the host may hold them under any tag — so they are not individually +verified and never trigger a false alarm. To get this assertion for a specific +image, pin it in the allowlist with an explicit tag or digest, e.g. +`DIND_HOST_PASSTHROUGH_IMAGES=konard/hive-mind-dind:2.0.6`. + ## Commit Cycles `DIND_SKIP_DAEMON=1` is useful for setup containers where you want to install or diff --git a/experiments/preload-unit-test.sh b/experiments/preload-unit-test.sh index b251fc1..adebe83 100755 --- a/experiments/preload-unit-test.sh +++ b/experiments/preload-unit-test.sh @@ -41,11 +41,24 @@ case "$1" in [ "$host" = "1" ] && cat "$HOST_IMAGES" 2>/dev/null exit 0 ;; load) - cat >/dev/null 2>&1 || true # drain the piped tar stream like real `docker load` - echo "loaded" >> "$DOCKER_LOADED"; exit 0 ;; + echo "loaded" >> "$DOCKER_LOADED" + if [ "${2:-}" = "-i" ]; then + # Tarball load (`docker load -i file`): nothing is piped and the mock has + # no way to know which refs the tarball carried, so just record the load. + exit 0 + fi + # Piped load (`docker -H .. save | docker load`): our mock `save` + # encodes the ref as a "REF:" line, so the loaded image becomes present + # in the inner daemon — mirroring real `docker load` so post-load + # verification (issue #106) sees what was actually seeded. + while IFS= read -r line; do + case "$line" in REF:*) printf '%s\n' "${line#REF:}" >> "$DOCKER_PRESENT" ;; esac + done + exit 0 ;; save) - # `docker -H .. save ` streams a tarball; mark it saved. - echo "$2" >> "$DOCKER_SAVED"; echo "fake-tar-stream"; exit 0 ;; + # `docker -H .. save ` streams a tarball; mark it saved and encode the + # ref so the piped `docker load` can mark it present (see above). + echo "$2" >> "$DOCKER_SAVED"; printf 'REF:%s\n' "$2"; exit 0 ;; pull) echo "$2" >> "$DOCKER_PULLED"; echo "$2" >> "$DOCKER_PRESENT"; exit 0 ;; *) exit 0 ;; @@ -61,6 +74,11 @@ export DOCKER_PRESENT="$WORK/present.log" export DOCKER_SAVED="$WORK/saved.log" export HOST_IMAGES="$WORK/host-images.log" export HOST_DIGESTS="$WORK/host-digests.log" +# Captured entrypoint stdout/stderr for the issue #106 verification cases. +# Exported so the `bash -c '! grep ...'` negative checks resolve the path inside +# their subshell (an unexported $WORK would silently miss the file). +export OUT_LOG="$WORK/out.log" +export ERR_LOG="$WORK/err.log" # --- Source the real entrypoint for its functions only --- # shellcheck disable=SC1090 @@ -138,17 +156,17 @@ check "no docker calls at all" bash -c '! test -s "$DOCKER_CALLS"' echo "== Case 7: missing tarball path warns, no load ==" reset_state DIND_HOST_PASSTHROUGH=off DIND_PRELOAD_TARBALL="$WORK/does-not-exist.tar" DIND_PRELOAD_IMAGES="" \ - DOCKER_INFO_OK=1 preload_into_daemon 2>"$WORK/err.log" + DOCKER_INFO_OK=1 preload_into_daemon 2>"$ERR_LOG" check "no load for missing path" bash -c '! grep -q "load -i" "$DOCKER_CALLS"' -check "warning emitted for missing path" grep -q "does not exist" "$WORK/err.log" +check "warning emitted for missing path" grep -q "does not exist" "$ERR_LOG" echo "== Case 8: passthrough is a quiet no-op when no host socket is mounted ==" reset_state DIND_HOST_PASSTHROUGH=public DIND_HOST_DOCKER_SOCK="$WORK/absent.sock" \ DIND_PRELOAD_TARBALL="" DIND_PRELOAD_IMAGES="" DOCKER_INFO_OK=1 \ - preload_into_daemon 2>"$WORK/err.log" + preload_into_daemon 2>"$ERR_LOG" check "no host save attempted without a socket" bash -c '! test -s "$DOCKER_SAVED"' -check "no warning emitted when socket simply absent" bash -c '! test -s "$WORK/err.log"' +check "no warning emitted when socket simply absent" bash -c '! test -s "$ERR_LOG"' echo "== Case 8b: explicit allowlist + absent socket warns about the missing mount (issue #102) ==" reset_state @@ -158,10 +176,10 @@ reset_state DIND_HOST_PASSTHROUGH=public DIND_HOST_DOCKER_SOCK="$WORK/absent.sock" \ DIND_HOST_PASSTHROUGH_IMAGES="hello-world" \ DIND_PRELOAD_TARBALL="" DIND_PRELOAD_IMAGES="" DOCKER_INFO_OK=1 \ - preload_into_daemon 2>"$WORK/err.log" + preload_into_daemon 2>"$ERR_LOG" check "no host save attempted without a socket" bash -c '! test -s "$DOCKER_SAVED"' -check "warning names DIND_HOST_PASSTHROUGH_IMAGES" grep -q "DIND_HOST_PASSTHROUGH_IMAGES is set" "$WORK/err.log" -check "warning suggests the -v mount remediation" grep -q -- "-v /var/run/docker.sock:" "$WORK/err.log" +check "warning names DIND_HOST_PASSTHROUGH_IMAGES" grep -q "DIND_HOST_PASSTHROUGH_IMAGES is set" "$ERR_LOG" +check "warning suggests the -v mount remediation" grep -q -- "-v /var/run/docker.sock:" "$ERR_LOG" echo "== Case 8c: present-but-unreachable socket still wins over the allowlist warning ==" reset_state @@ -172,9 +190,9 @@ touch "$WORK/dead.sock" DIND_HOST_PASSTHROUGH=public DIND_HOST_DOCKER_SOCK="$WORK/dead.sock" \ DIND_HOST_PASSTHROUGH_IMAGES="hello-world" \ DIND_PRELOAD_TARBALL="" DIND_PRELOAD_IMAGES="" DOCKER_INFO_OK=1 HOST_DOCKER_OK=0 \ - preload_into_daemon 2>"$WORK/err.log" -check "unreachable-socket warning fires" grep -q "is not accessible; skipping passthrough" "$WORK/err.log" -check "missing-mount hint suppressed when a socket file exists" bash -c '! grep -q "DIND_HOST_PASSTHROUGH_IMAGES is set" "$WORK/err.log"' + preload_into_daemon 2>"$ERR_LOG" +check "unreachable-socket warning fires" grep -q "is not accessible; skipping passthrough" "$ERR_LOG" +check "missing-mount hint suppressed when a socket file exists" bash -c '! grep -q "DIND_HOST_PASSTHROUGH_IMAGES is set" "$ERR_LOG"' rm -f "$WORK/dead.sock" echo "== Case 9: public mode copies a Docker Hub image, skips a local one ==" @@ -294,6 +312,56 @@ check "empty allowlist still saves hive-mind" grep -qx "konard/hive-mind:latest" check "empty allowlist still saves alpine" grep -qx "alpine:3.20" "$DOCKER_SAVED" rm -f "$HOST_SOCK" +echo "== Case 19: concrete allowlisted image present after passthrough -> honest 'complete' (issue #106) ==" +reset_state +# Host has the named image with a public RepoDigest; the socket is mounted, so +# passthrough copies it and the mock `load` marks it present in the inner daemon. +printf '%s\n' "konard/hive-mind-dind:2.0.6" > "$HOST_IMAGES" +echo "konard/hive-mind-dind:2.0.6|konard/hive-mind-dind@sha256:aaa " > "$HOST_DIGESTS" +make_sock "$HOST_SOCK" +DIND_HOST_PASSTHROUGH=public DIND_HOST_DOCKER_SOCK="$HOST_SOCK" \ + DIND_HOST_PASSTHROUGH_IMAGES="konard/hive-mind-dind:2.0.6" \ + DIND_PRELOAD_TARBALL="" DIND_PRELOAD_IMAGES="" DOCKER_INFO_OK=1 HOST_DOCKER_OK=1 \ + preload_into_daemon >"$OUT_LOG" 2>"$ERR_LOG" +check "seeded concrete image was saved from host" grep -qx "konard/hive-mind-dind:2.0.6" "$DOCKER_SAVED" +check "honest 'complete' marker printed" grep -q "image preload/passthrough complete" "$OUT_LOG" +check "no verification warning when present" bash -c '! grep -q "did NOT seed" "$ERR_LOG"' +check "no 'WITH WARNINGS' marker when present" bash -c '! grep -q "finished WITH WARNINGS" "$OUT_LOG" "$ERR_LOG"' +rm -f "$HOST_SOCK" + +echo "== Case 20: concrete allowlisted image absent -> loud warning, no false 'complete' (issue #106) ==" +reset_state +# No host socket mounted, so nothing can be copied. The named concrete image is +# absent from the inner daemon: verification must catch it and suppress 'complete'. +DIND_HOST_PASSTHROUGH=public DIND_HOST_DOCKER_SOCK="$WORK/absent.sock" \ + DIND_HOST_PASSTHROUGH_IMAGES="konard/hive-mind-dind:2.0.6" \ + DIND_PRELOAD_TARBALL="" DIND_PRELOAD_IMAGES="" DOCKER_INFO_OK=1 \ + preload_into_daemon >"$OUT_LOG" 2>"$ERR_LOG" +check "verification warns it did NOT seed the image" grep -q "did NOT seed expected image(s) into the nested daemon: konard/hive-mind-dind:2.0.6" "$ERR_LOG" +check "warning points at the missing -v mount" grep -q -- "-v /var/run/docker.sock:" "$ERR_LOG" +check "terminal marker is 'finished WITH WARNINGS'" grep -q "image preload/passthrough finished WITH WARNINGS" "$ERR_LOG" +check "misleading 'complete' is NOT printed" bash -c '! grep -q "image preload/passthrough complete" "$OUT_LOG" "$ERR_LOG"' + +echo "== Case 21: glob / bare-repo allowlist entries never raise a false verification alarm (issue #106) ==" +reset_state +# Neither a glob nor a bare repository is concrete, so verification must skip +# them (the host could hold any tag) and still report an honest 'complete'. +DIND_HOST_PASSTHROUGH=public DIND_HOST_DOCKER_SOCK="$WORK/absent.sock" \ + DIND_HOST_PASSTHROUGH_IMAGES="konard/hive-mind* konard/other" \ + DIND_PRELOAD_TARBALL="" DIND_PRELOAD_IMAGES="" DOCKER_INFO_OK=1 \ + preload_into_daemon >"$OUT_LOG" 2>"$ERR_LOG" +check "no verification warning for non-concrete entries" bash -c '! grep -q "did NOT seed" "$ERR_LOG"' +check "honest 'complete' still printed" grep -q "image preload/passthrough complete" "$OUT_LOG" + +echo "== Case 22: ref_is_concrete classification (direct calls) ==" +reset_state +check "explicit tag is concrete" eval 'ref_is_concrete "konard/hive-mind-dind:2.0.6"' +check "explicit digest is concrete" eval 'ref_is_concrete "konard/hive-mind-dind@sha256:abc"' +check "bare repo is NOT concrete" eval '! ref_is_concrete "konard/hive-mind"' +check "glob is NOT concrete" eval '! ref_is_concrete "konard/hive-mind*"' +check "registry port w/o tag NOT concrete" eval '! ref_is_concrete "registry.example.com:5000/repo"' +check "registry port WITH tag is concrete" eval 'ref_is_concrete "registry.example.com:5000/repo:v1"' + echo "== Case 18: image-matching helper normalization (direct calls) ==" reset_state # Like Case 13, drive the sourced helper in the current shell via `eval` so the diff --git a/tests/dind/example-preload-images.sh b/tests/dind/example-preload-images.sh index db1a9b6..29b883c 100755 --- a/tests/dind/example-preload-images.sh +++ b/tests/dind/example-preload-images.sh @@ -50,8 +50,13 @@ wait_for_preload_complete() { local i=0 while [ "$i" -lt "$limit" ]; do - if logs_contain "$container" "image preload/passthrough complete"; then - log "image preload/passthrough completed in ${container} after ${i}s" + # The entrypoint ends the preload phase with one of two terminal markers: + # "...complete" on success or "...finished WITH WARNINGS" when a named image + # was not seeded (issue #106). Sync on either so the wait never hangs on the + # warning path. + if logs_contain "$container" "image preload/passthrough complete" \ + || logs_contain "$container" "image preload/passthrough finished WITH WARNINGS"; then + log "image preload/passthrough finished in ${container} after ${i}s" return 0 fi i=$((i + 1)) @@ -228,6 +233,58 @@ if ! logs_contain "$images_container" "images=${fixture_repo}"; then fi log "images-allowlist passthrough copied only the named repo and skipped the rest" +# --- Post-passthrough verification of a concrete allowlist entry (issue #106) -- +# A concrete (explicitly tagged) allowlist entry is verified to actually be +# present in the nested daemon after passthrough. When the host has it and the +# socket is mounted, verification passes and the entrypoint logs the honest +# "complete" marker with no warning. +verify_ok_container="${DIND_EXAMPLE_ID}-passthrough-verify-ok" +log "starting consumer with a concrete DIND_HOST_PASSTHROUGH_IMAGES=${fixture_image} (socket mounted)" +run_dind_container "$verify_ok_container" \ + -e DIND_HOST_PASSTHROUGH=all \ + -e "DIND_HOST_PASSTHROUGH_IMAGES=$fixture_image" \ + -e DIND_HOST_DOCKER_SOCK=/host-sock/docker.sock \ + -v "$host_sock_dir:/host-sock:ro" +wait_for_inner_docker "$verify_ok_container" +wait_for_preload_complete "$verify_ok_container" +assert_inner_has_image "$verify_ok_container" +if ! logs_contain "$verify_ok_container" "image preload/passthrough complete"; then + docker logs "$verify_ok_container" >&2 || true + fail "expected the honest 'complete' marker when the concrete allowlisted image was seeded" +fi +if logs_contain "$verify_ok_container" "host-image passthrough did NOT seed"; then + docker logs "$verify_ok_container" >&2 || true + fail "verification must not warn when the concrete allowlisted image is present" +fi +log "verification confirmed the seeded image and logged the honest completion marker" + +# --- Verification flags a concrete allowlist entry that was NOT seeded (#106) -- +# Same concrete entry, but NO host socket mounted: the image cannot be copied, so +# verification must catch the absence, warn loudly naming the image, and the +# entrypoint must NOT print a misleading "complete" — it ends with the explicit +# "finished WITH WARNINGS" marker instead. This is the core regression guard for +# the "claims complete yet re-pulls ~30 GB" symptom. +verify_miss_container="${DIND_EXAMPLE_ID}-passthrough-verify-miss" +log "starting consumer with a concrete DIND_HOST_PASSTHROUGH_IMAGES=${fixture_image} but NO socket" +run_dind_container "$verify_miss_container" \ + -e DIND_HOST_PASSTHROUGH=public \ + -e "DIND_HOST_PASSTHROUGH_IMAGES=$fixture_image" +wait_for_inner_docker "$verify_miss_container" +wait_for_preload_complete "$verify_miss_container" +if ! logs_contain "$verify_miss_container" "host-image passthrough did NOT seed expected image(s) into the nested daemon: ${fixture_image}"; then + docker logs "$verify_miss_container" >&2 || true + fail "expected verification to warn naming the missing concrete image (${fixture_image})" +fi +if ! logs_contain "$verify_miss_container" "image preload/passthrough finished WITH WARNINGS"; then + docker logs "$verify_miss_container" >&2 || true + fail "expected the 'finished WITH WARNINGS' terminal marker when a named image was not seeded" +fi +if logs_contain "$verify_miss_container" "image preload/passthrough complete"; then + docker logs "$verify_miss_container" >&2 || true + fail "must NOT print 'image preload/passthrough complete' when a named image was not seeded" +fi +log "verification surfaced the un-seeded image and suppressed the misleading 'complete' marker" + # --- Opt-in allowlist but no host socket mounted (issue #102) --------------- # Setting DIND_HOST_PASSTHROUGH_IMAGES is an unambiguous "pass these through" # signal. If the operator forgets the `-v` socket mount, passthrough used to be diff --git a/ubuntu/24.04/dind/dind-entrypoint.sh b/ubuntu/24.04/dind/dind-entrypoint.sh index efb581c..3a0840d 100644 --- a/ubuntu/24.04/dind/dind-entrypoint.sh +++ b/ubuntu/24.04/dind/dind-entrypoint.sh @@ -83,6 +83,14 @@ # (e.g. "docker.io/konard/hive-mind*"). This narrows # passthrough one level finer than the registry allowlist # — to specific repositories / image names. (issue #97) +# After passthrough runs, every *concrete* entry here (an +# explicit tag or digest, no glob) is verified to actually +# be present in the nested daemon; a missing one triggers a +# loud warning instead of a false "complete", because the +# first nested 'docker run' would otherwise silently re-pull +# the multi-GB image from the registry (the lingering +# symptom of issues #94 / #102, still seen downstream in +# link-assistant/hive-mind#1914/#1946). (issue #106) set -eu @@ -487,6 +495,62 @@ passthrough_host_images() { done } +# True when a reference is concrete enough to verify by name: it carries an +# explicit tag or digest and contains no glob metacharacters. A bare repository +# ("konard/hive-mind") or a glob ("konard/hive-mind*") is NOT concrete — it has +# no single deterministic ref to inspect in the nested daemon (the host may hold +# it under any tag), so verification skips it rather than risk a false alarm. +# Note: a ':' is only a tag separator in the LAST path segment; "host:5000/repo" +# is a registry port, not a tag, and is correctly treated as non-concrete. +ref_is_concrete() { + case "$1" in + *'*'*|*'?'*|*'['*) return 1 ;; # glob pattern + esac + case "$1" in + *@*) return 0 ;; # explicit digest (…@sha256:…) + esac + case "${1##*/}" in + *:*) return 0 ;; # explicit tag in the final segment + esac + return 1 +} + +# Assert that every concrete DIND_HOST_PASSTHROUGH_IMAGES entry actually landed +# in the nested daemon after passthrough ran. Setting the allowlist is an +# unambiguous "seed these" request; if a named image is still absent, the first +# nested 'docker run' will silently re-pull it (multi-GB, ~1h downstream — the +# exact #94/#102 symptom). Rather than print a misleading "complete", surface a +# loud, actionable warning naming the missing image(s) and the likely cause. +# Returns 0 when everything expected is present (or there is nothing concrete to +# check), non-zero when at least one named image is missing. (issue #106) +verify_passthrough_images() { + host_passthrough_enabled || return 0 + [ -n "$DIND_HOST_PASSTHROUGH_IMAGES" ] || return 0 + + missing="" + for entry in $DIND_HOST_PASSTHROUGH_IMAGES; do + ref_is_concrete "$entry" || continue + if ! docker image inspect "$entry" >/dev/null 2>&1; then + missing="${missing:+$missing }$entry" + fi + done + + [ -n "$missing" ] || return 0 + + warn "host-image passthrough did NOT seed expected image(s) into the nested daemon: ${missing}" + warn "the first nested 'docker run' will re-pull each from its registry (multi-GB, slow)." + if host_docker_available; then + warn "the host socket at ${DIND_HOST_DOCKER_SOCK} is reachable, so the host most likely does not" + warn "have the image under that exact reference, or mode=${DIND_HOST_PASSTHROUGH} filtered it out" + warn "(public passes only images with a public RepoDigest; use DIND_HOST_PASSTHROUGH=all for" + warn "locally-built or private images)." + else + warn "no usable host docker socket at ${DIND_HOST_DOCKER_SOCK}; mount it read-only with" + warn "-v /var/run/docker.sock:${DIND_HOST_DOCKER_SOCK}:ro so passthrough can copy the image." + fi + return 1 +} + preload_into_daemon() { # Tarball/registry preload only run when their vars are set; host passthrough # is on by default, so we still proceed to give it a chance to find a socket. @@ -503,10 +567,17 @@ preload_into_daemon() { preload_tarballs passthrough_host_images preload_images - # Emit a completion marker once every preload path has finished so consumers + # Emit a terminal marker once every preload path has finished so consumers # (and tests) can synchronize on "images are seeded" rather than racing the - # asynchronous load against mere dockerd readiness. (issue #94) - log "image preload/passthrough complete" + # asynchronous load against mere dockerd readiness. (issue #94) The wording is + # honest about the outcome: only claim "complete" when every concrete + # allowlisted image is actually present; otherwise say so loudly instead of + # papering over a silent re-pull. (issue #106) + if verify_passthrough_images; then + log "image preload/passthrough complete" + else + warn "image preload/passthrough finished WITH WARNINGS: expected host image(s) were not seeded (see above)" + fi } # Allow the unit tests to source this file for the function definitions without From 6c3d582d1680170c66296f194da8ca5be1cdf43a Mon Sep 17 00:00:00 2001 From: konard Date: Fri, 19 Jun 2026 11:44:17 +0000 Subject: [PATCH 3/4] tests/dind: wait for the vfs warning instead of racing dockerd readiness (fix flaky dind-js) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CI dind-js example suite failed in example-storage-driver-vfs.sh: 'expected the vfs warning to mention "'vfs' storage driver", but it was absent from the container logs'. The vfs copy-on-write warning (issue #104) is emitted by the entrypoint (PID 1) *after* the inner dockerd becomes ready, in warn_if_vfs_storage_driver. The test checked the logs once, the instant wait_for_inner_docker returned — and the test's external 'docker exec docker info' readiness probe can win the race against the entrypoint's own readiness loop, grabbing the logs before the warning is flushed. The dumped logs at the failure showed only '[dind-entrypoint] Starting dockerd (storage-driver=vfs)', confirming the warning simply had not been printed yet. Add a bounded wait_for_logs CONTAINER NEEDLE [LIMIT] helper to lib.sh (poll logs_contain until the line appears or the timeout elapses) and use it for the three vfs-warning needles, mirroring the existing wait_for_preload_complete pattern in example-preload-images.sh. No production code change. --- tests/dind/example-storage-driver-vfs.sh | 5 ++++- tests/dind/lib.sh | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/dind/example-storage-driver-vfs.sh b/tests/dind/example-storage-driver-vfs.sh index 35e988a..a395e80 100755 --- a/tests/dind/example-storage-driver-vfs.sh +++ b/tests/dind/example-storage-driver-vfs.sh @@ -22,9 +22,12 @@ log "inner dockerd is using the vfs storage driver" # issue #104: landing on vfs must not be silent. The entrypoint (PID 1) emits a # copy-on-write warning to stderr, which docker captures in the container logs. +# The warning is emitted right *after* the inner dockerd becomes ready, so poll +# for it rather than racing wait_for_inner_docker (it can otherwise read a +# present line as absent — see wait_for_logs in lib.sh). log "verifying the vfs copy-on-write warning was emitted (issue #104)" for needle in "'vfs' storage driver" "no space left on device" "DIND_STORAGE_DRIVER=fuse-overlayfs"; do - if ! logs_contain "$container" "$needle"; then + if ! wait_for_logs "$container" "$needle"; then docker logs "$container" >&2 || true fail "expected the vfs warning to mention \"${needle}\", but it was absent from the container logs" fi diff --git a/tests/dind/lib.sh b/tests/dind/lib.sh index d0b2727..1bdd95c 100755 --- a/tests/dind/lib.sh +++ b/tests/dind/lib.sh @@ -84,6 +84,26 @@ logs_contain() { esac } +# wait_for_logs CONTAINER NEEDLE [LIMIT] +# Polls CONTAINER's logs until NEEDLE appears or LIMIT seconds elapse. Returns 0 +# as soon as it is found, non-zero on timeout. Several entrypoint breadcrumbs +# (the vfs storage-driver warning, the preload/passthrough markers) are emitted +# by PID 1 *after* the inner dockerd becomes ready, so a test that grabbed the +# logs the instant wait_for_inner_docker returned could race ahead of them and +# read a present line as absent. Wait for the line itself rather than racing +# mere daemon readiness. +wait_for_logs() { + local container="$1" needle="$2" limit="${3:-$DIND_WAIT_SECONDS}" i=0 + while [ "$i" -lt "$limit" ]; do + if logs_contain "$container" "$needle"; then + return 0 + fi + i=$((i + 1)) + sleep 1 + done + return 1 +} + run_container_from_image() { local name="$1" local image="$2" From eaeed07bcfc8b5b9069a0c5bb8d2307f2bc6744b Mon Sep 17 00:00:00 2001 From: konard Date: Fri, 19 Jun 2026 12:36:39 +0000 Subject: [PATCH 4/4] Revert "Initial commit with task details" This reverts commit 1b565d896b03b20437664eec840e9cb681213e76. --- .gitkeep | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .gitkeep diff --git a/.gitkeep b/.gitkeep deleted file mode 100644 index d89aa44..0000000 --- a/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -# .gitkeep file auto-generated at 2026-06-19T10:31:26.211Z for PR creation at branch issue-106-81bf2d7bb66f for issue https://github.com/link-foundation/box/issues/106 \ No newline at end of file