From 23e0de2943a6669e6e1ed34e2c533c3d9cd8ec8c Mon Sep 17 00:00:00 2001 From: Cameron Bergh <15796764+cameronbergh@users.noreply.github.com> Date: Tue, 5 May 2026 12:29:22 -0700 Subject: [PATCH] Add CUDA Docker image support --- .dockerignore | 48 ++++++++++++++++ Dockerfile | 134 +++++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 19 ++++++ docker/entrypoint.sh | 16 ++++++ justfile | 9 +++ 5 files changed, 226 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100755 docker/entrypoint.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..a5a3603b0a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,48 @@ +target/ +dist/ +*.egg-info/ +__pycache__/ +*.pyc +*.pyo +.ruff_cache/ +.mypy_cache/ + +.vscode/ +.idea/ +*.swp +*.swo +*~ + +.git/ +.gitignore +.gitattributes + +.direnv/ +result +result-* +flake.lock + +dashboard/build/ +dashboard/node_modules/ +dashboard/.svelte-kit/ + +.venv/ +venv/ +env/ + +.env +.env.* +*.local + +.pytest_cache/ +htmlcov/ +.coverage +coverage.xml + +docs/ +.github/ + +.DS_Store + +*.bak +*.orig diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..8216131852 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,134 @@ +FROM nvidia/cuda:13.0.2-devel-ubuntu24.04 AS rust-builder + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + build-essential \ + pkg-config \ + libssl-dev \ + git \ + software-properties-common \ + && rm -rf /var/lib/apt/lists/* + +RUN add-apt-repository ppa:deadsnakes/ppa -y \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python3.13 \ + python3.13-dev \ + python3.13-venv \ + && rm -rf /var/lib/apt/lists/* + +ENV RUSTUP_HOME=/usr/local/rustup \ + CARGO_HOME=/usr/local/cargo \ + PATH=/usr/local/cargo/bin:$PATH + +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- \ + -y --default-toolchain nightly --profile minimal + +RUN python3.13 -m ensurepip --upgrade \ + && python3.13 -m pip install --no-cache-dir maturin + +WORKDIR /build +COPY Cargo.toml Cargo.lock ./ +COPY rust/ ./rust/ + +RUN maturin build \ + --release \ + --manylinux off \ + --manifest-path rust/exo_pyo3_bindings/Cargo.toml \ + --features "pyo3/extension-module,pyo3/experimental-async" \ + --interpreter python3.13 \ + --out /wheels + +FROM node:22-slim AS dashboard-builder + +WORKDIR /build/dashboard +COPY dashboard/package.json dashboard/package-lock.json ./ +RUN npm ci + +COPY dashboard/ ./ +RUN npm run build + +FROM nvidia/cuda:13.0.2-cudnn-devel-ubuntu24.04 AS runtime + +ENV DEBIAN_FRONTEND=noninteractive \ + CUDA_HOME=/usr/local/cuda \ + UV_INSTALL_DIR=/usr/local/bin + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + git \ + libssl3 \ + software-properties-common \ + cmake \ + build-essential \ + libblas-dev \ + liblapack-dev \ + liblapacke-dev \ + && rm -rf /var/lib/apt/lists/* + +RUN add-apt-repository ppa:deadsnakes/ppa -y \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python3.13 \ + python3.13-dev \ + python3.13-venv \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -LsSf https://astral.sh/uv/install.sh | sh + +ENV VIRTUAL_ENV=/app/.venv \ + PATH="/app/.venv/bin:/usr/local/bin:$PATH" + +WORKDIR /app + +COPY --from=rust-builder /wheels/*.whl /tmp/wheels/ +COPY --from=dashboard-builder /build/dashboard/build ./dashboard/build/ + +COPY pyproject.toml uv.lock README.md ./ +# uv validates workspace members even when Docker installs prebuilt wheels instead. +COPY bench/pyproject.toml ./bench/pyproject.toml +COPY rust/exo_pyo3_bindings/pyproject.toml ./rust/exo_pyo3_bindings/pyproject.toml +COPY resources/ ./resources/ +COPY src/ ./src/ + +RUN uv venv --python python3.13 \ + && uv sync --extra cuda13 --no-install-project --no-install-workspace \ + && uv pip install /tmp/wheels/*.whl \ + && uv pip install . --no-deps \ + && rm -rf /tmp/wheels + +# MLX-LM expects this stream helper, while current Linux CUDA MLX wheels expose +# the equivalent API as mx.new_stream(). Keep the compatibility shim local to +# the Docker image until upstream Linux CUDA wheels catch up. +RUN /app/.venv/bin/python - <<'PY' +from pathlib import Path +import site +site_packages = Path(site.getsitepackages()[0]) +(site_packages / "mlx_cuda_compat.py").write_text( + "import mlx.core as mx\n" + "if not hasattr(mx, 'new_thread_local_stream') and hasattr(mx, 'new_stream'):\n" + " mx.new_thread_local_stream = mx.new_stream\n" +) +(site_packages / "mlx_cuda_compat.pth").write_text("import mlx_cuda_compat\n") +PY + +# Pre-download tiktoken vocab file for openai_harmony. +# This prevents runtime download failures in restricted network environments. +# See: https://github.com/exo-explore/exo/issues/1038 +RUN mkdir -p /app/tiktoken_cache \ + && curl -sSL https://openaipublic.blob.core.windows.net/encodings/o200k_base.tiktoken \ + -o /app/tiktoken_cache/o200k_base.tiktoken + +ENV TIKTOKEN_ENCODINGS_BASE=/app/tiktoken_cache + +COPY docker/entrypoint.sh /app/docker-entrypoint.sh +RUN chmod +x /app/docker-entrypoint.sh + +EXPOSE 52415 + +ENTRYPOINT ["/app/docker-entrypoint.sh"] +CMD ["/app/.venv/bin/exo"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000..2431441c93 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +services: + exo: + build: + context: . + dockerfile: Dockerfile + image: exo:cuda13 + network_mode: host + gpus: all + volumes: + # Exo data directory (includes models subdirectory) + - ~/.local/share/exo:/root/.local/share/exo + # Exo cache (logs, temporary files) + - ~/.cache/exo:/root/.cache/exo + # Exo config (keypair, settings) + - ~/.config/exo:/root/.config/exo + # HuggingFace cache (model downloads) + - ~/.cache/huggingface:/root/.cache/huggingface + stdin_open: true + tty: true diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100755 index 0000000000..07429b4048 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +site_packages="$('/app/.venv/bin/python' - <<'PY' +import site +print(site.getsitepackages()[0]) +PY +)" + +for library_dir in "$site_packages"/nvidia/*/lib "$site_packages"/nvidia/cu13/lib; do + if [ -d "$library_dir" ]; then + export LD_LIBRARY_PATH="$library_dir:${LD_LIBRARY_PATH:-}" + fi +done + +exec "$@" diff --git a/justfile b/justfile index 96e26c84b7..75e50c839c 100644 --- a/justfile +++ b/justfile @@ -47,3 +47,12 @@ clean: rm -rf dashboard/node_modules rm -rf dashboard/.svelte-kit rm -rf dashboard/build + +docker-build: + docker build -t exo:cuda13 . + +docker-run *ARGS: + docker compose run --rm exo {{ARGS}} + +docker-shell: + docker compose run --rm --entrypoint /bin/bash exo