Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
15d29ee
feat(dynamo): AdminAPI abstraction + dynamo backend selector + RL wor…
biswapanda Jun 6, 2026
a1e0c5f
feat(weight-transfer): NCCL broadcast + FP8/E8M0 conversion for GB200…
biswapanda Jun 6, 2026
685c13a
feat(broadcast): NFS-safe filesystem broadcast + weight_broadcast.kee…
biswapanda Jun 6, 2026
4f5ba0b
feat(orchestrator): dispatch compute_teacher_logprobs by renderer_tra…
biswapanda Jun 6, 2026
7e4bc48
feat(inference): vLLM 0.22 patches (fp32 lm-head, int64 silu_mul_quan…
biswapanda Jun 6, 2026
9f2d880
build(deps): point verifiers/renderers submodules at biswapanda rl-sd…
biswapanda Jun 6, 2026
a03a559
build(image): Dockerfile.cuda.runtime (vLLM 0.22, COPY deps, DeepGEMM…
biswapanda Jun 6, 2026
7643298
feat(deploy): helm chart updates + dynamo k8s manifests + smoke-test …
biswapanda Jun 6, 2026
49a4d1f
fix(rl): rename renderer_transport values to vllm_generate/dynamo_cha…
biswapanda Jun 9, 2026
e75f343
fix(routed_experts): carry dtype for models with over 256 experts
biswapanda Jun 10, 2026
cbc2792
feat(inference): forward moe_backend to vLLM so router-replay capture…
biswapanda Jun 10, 2026
788281c
fix(routed_experts): int32 fallback for >65535 experts, normalize uin…
biswapanda Jun 10, 2026
13e411d
fix(routed_experts): preserve per-model dtype so batch packing stays …
biswapanda Jun 10, 2026
2c61937
fix(k8s): drop trainer nvidia.com/gpu request when trainer DRA resour…
biswapanda Jun 10, 2026
1b5917a
fix(k8s): set backend=dynamo in the dynamo-deploy client example
biswapanda Jun 10, 2026
6f42560
chore(deps): bump verifiers/renderers submodules to rl-sdk-4 routed_e…
biswapanda Jun 11, 2026
f8f42f8
chore(deps): restore submodules and .gitmodules to upstream, dropping…
biswapanda Jun 11, 2026
8c837e6
rm extra files
biswapanda Jun 11, 2026
08bb4ea
rm unnecessary files
biswapanda Jun 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[submodule "verifiers"]
path = deps/verifiers
url = git@github.com:PrimeIntellect-ai/verifiers.git
url = git@github.com:biswapanda/verifiers.git
[submodule "renderers"]
path = deps/renderers
url = git@github.com:PrimeIntellect-ai/renderers.git
url = git@github.com:biswapanda/renderers.git
[submodule "research-environments"]
path = deps/research-environments
url = git@github.com:PrimeIntellect-ai/research-environments.git
Expand Down
189 changes: 189 additions & 0 deletions Dockerfile.cuda.runtime
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# Multi-stage Dockerfile for prime-rl with NVRTC support on GB200 (sm_100a).
#
# WHY THIS EXISTS (separate from Dockerfile.cuda):
# The original Dockerfile.cuda uses `python:3.12-slim` as the runtime base.
# That image has no CUDA toolkit, so tilelang's JIT path (which compiles the
# sparse-MLA kernels at runtime via NVRTC) fails with:
#
# atomic.h(7): catastrophic error: cannot open source file "cuda/atomic"
#
# `cuda/atomic` is a libcudacxx (CCCL) header. It is shipped only by the
# CUDA dev/devel toolkit, not by the `nvidia-cuda-*` pip wheels. Without it
# NVRTC cannot compile any kernel that pulls in tilelang's atomic.h.
#
# This Dockerfile uses NVIDIA's cuda-dl-base (devel) image for both stages,
# so the runtime image carries the libcudacxx / CCCL headers tilelang needs
# and the wheels in /app/.venv keep their CUDA 12.x ABI.
#
# BASE IMAGE:
# nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04
# - matches dynamo/container/context.yaml `prime-rl.cuda12.9` entry
# - ships Python 3.12 by default (Ubuntu 24.04)
# - includes libcudacxx, CCCL, cuDNN, nvcc, and full CUDA dev headers
# - forward-compatible with the CUDA 12.8 wheels pinned in uv.lock
#
# USAGE:
# docker buildx build --platform linux/arm64 \
# --build-arg TARGETARCH=arm64 \
# -f Dockerfile.cuda.runtime -t <tag> .

ARG BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base
ARG BASE_IMAGE_TAG=25.06-cuda12.9-devel-ubuntu24.04

############################
##### Build stage ##########
############################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS builder
LABEL maintainer="prime intellect"
LABEL repository="prime-rl"

# Set en_US.UTF-8 locale by default
RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment

# CUDA_HOME / PATH from base image are already correct (/usr/local/cuda), but
# pin them explicitly so downstream tooling (tilelang, flash-attn) sees them.
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=$PATH:/usr/local/cuda/bin

# Install build tooling.
ARG DEBIAN_FRONTEND=noninteractive
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
sudo \
git \
ninja-build \
&& apt-get clean autoclean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# Install uv.
ADD https://astral.sh/uv/install.sh /uv-installer.sh
RUN INSTALLER_NO_MODIFY_PATH=1 UV_INSTALL_DIR="/usr/local/bin" sh /uv-installer.sh && rm /uv-installer.sh
ENV PATH="/usr/local/bin:$PATH"
ENV UV_PYTHON_INSTALL_DIR="/usr/local/share/uv/python"
ENV UV_CACHE_DIR="/usr/local/share/uv/cache"

# Install Python dependencies (gradual copies help with caching).
WORKDIR /app

ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy

COPY pyproject.toml /app/pyproject.toml
COPY uv.lock /app/uv.lock
COPY README.md /app/README.md
COPY src/ /app/src/
COPY packages/ /app/packages/
COPY deps/ /app/deps/
COPY configs /app/configs
COPY examples /app/examples
COPY benchmarks/scripts /app/benchmarks/scripts

RUN --mount=type=cache,target=/app/.cache/uv \
uv sync --extra flash-attn --extra flash-attn-3 --extra flash-attn-cute --extra envs --extra gpt-oss --group mamba-ssm --locked --no-dev

# arm64: build flash-attn + DeepGEMM from source.
ARG TARGETARCH
COPY scripts/docker-arm64-post-install.sh /app/scripts/docker-arm64-post-install.sh
COPY scripts/install_deep_gemm.sh /app/scripts/install_deep_gemm.sh
RUN if [ "$TARGETARCH" = "arm64" ]; then /app/scripts/docker-arm64-post-install.sh; fi

# vLLM PR #39366 (two-phase DP pause) is native in vLLM 0.22 — no patch needed
# (the rl-sdk-4 merge bumped vLLM 0.20.2 -> 0.22 and dropped this patch).

############################
##### Runtime stage ########
############################
# Same image so libcudacxx, CCCL headers, and the system Python 3.12 are all
# present at runtime — the original Dockerfile.cuda switched to python:3.12-slim
# here and lost the CUDA dev headers, which broke tilelang's NVRTC backend.
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}

ENV CUDA_HOME=/usr/local/cuda
ENV PATH=/usr/local/cuda/bin:$PATH

ARG DEBIAN_FRONTEND=noninteractive
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
wget \
clang \
tmux \
iperf \
openssh-server \
git \
git-lfs \
gpg \
sudo \
iputils-ping \
net-tools \
curl \
vim \
libibverbs1 \
ibverbs-providers \
python3.12 \
python3.12-venv \
&& apt-get clean autoclean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# Ensure `python` / `python3` point at 3.12 (ubuntu24.04 ships 3.12 already,
# but the symlinks aren't created by default).
RUN ln -sf /usr/bin/python3.12 /usr/local/bin/python \
&& ln -sf /usr/bin/python3.12 /usr/local/bin/python3 \
&& ln -sf /usr/bin/python3.12 /usr/local/bin/python3.12

ARG USER_ID=1000
ARG GROUP_ID=1000
# Ubuntu 24.04 ships a default `ubuntu` user at uid 1000; remove it so the
# explicit appuser keeps uid/gid 1000 (matches Dockerfile.cuda + k8s manifests).
RUN userdel -r ubuntu 2>/dev/null || true \
&& groupadd --gid $GROUP_ID appuser \
&& useradd --uid $USER_ID --gid appuser --create-home --shell /bin/bash appuser \
&& usermod -aG sudo appuser \
&& echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers

# Install uv for development use.
ADD https://astral.sh/uv/install.sh /uv-installer.sh
RUN INSTALLER_NO_MODIFY_PATH=1 UV_INSTALL_DIR="/usr/local/bin" sh /uv-installer.sh && rm /uv-installer.sh

USER appuser
ENV PATH="/usr/local/bin:$PATH"
WORKDIR /app
# Copy the application + venv from the builder.
COPY --from=builder --chown=appuser:appuser /app /app

# Copy and set up entrypoint script.
COPY --chown=appuser:appuser scripts/docker-entrypoint.sh /app/docker-entrypoint.sh
RUN chmod +x /app/docker-entrypoint.sh

# Repoint venv Python symlinks at the runtime-stage interpreter (the builder
# used a uv-managed Python that does not exist here).
RUN rm /app/.venv/bin/python && ln -s /usr/bin/python3.12 /app/.venv/bin/python
RUN rm /app/.venv/bin/python3 && ln -s /usr/bin/python3.12 /app/.venv/bin/python3
RUN rm /app/.venv/bin/python3.12 && ln -s /usr/bin/python3.12 /app/.venv/bin/python3.12

# python3.12-dev: Python.h headers for vLLM's Triton CudaUtils JIT-compile at runtime.
# Required by vLLM serving (dynamo.vllm / inference); absent from the cuda-dl-base
# runtime, which crashed inference with `fatal error: Python.h: No such file or directory`.
USER root
RUN apt-get update && apt-get install -y --no-install-recommends python3.12-dev \
&& apt-get clean autoclean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
USER appuser

# Place executables in the environment at the front of the path.
ENV PATH="/app/.venv/bin:$PATH"

# HuggingFace Hub timeouts (defaults are 10s which causes issues on slow networks).
ENV HF_HUB_ETAG_TIMEOUT=500
ENV HF_HUB_DOWNLOAD_TIMEOUT=300

# Enable FP8 grouped-GEMM kernels in vLLM MoE layers (requires DeepGEMM, built
# during the arm64 post-install step above).
ENV VLLM_USE_DEEP_GEMM=1
ENV VLLM_MOE_USE_DEEP_GEMM=1

# Use entrypoint for setup (ulimit, etc) but default to sleep infinity for K8s.
ENTRYPOINT ["/app/docker-entrypoint.sh"]
CMD ["sleep", "infinity"]
79 changes: 79 additions & 0 deletions Dockerfile.dynamo
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# syntax=docker/dockerfile:1.4
# Dockerfile.dynamo — layer ai-dynamo onto a prime-rl image WITHOUT reinstalling vLLM.
#
# The prime-rl base already ships vLLM 0.20.2 (+ vLLM PR #39366 two-phase pause),
# torch, flashinfer, DeepGEMM. We build the dynamo Rust bindings (ai-dynamo-runtime,
# via maturin) at DYNAMO_REF, then install the dynamo Python package — which provides
# BOTH `dynamo.frontend` and `dynamo.vllm` (hatch packages = components/src/dynamo) —
# with `--no-deps` so the base's vLLM / torch / transformers are NEVER touched.
# A curated set of dynamo runtime deps (explicitly excluding vllm/torch/ray) is added
# via `uv pip` (the prime-rl venv is uv-managed and has no `pip` binary).
#
# Build (BuildKit; run in the arm64 dind builder):
# DOCKER_BUILDKIT=1 docker build -f Dockerfile.dynamo \
# --build-arg BASE_IMAGE=nvcr.io/nvidian/dynamo-dev/biswa:prime-rl-97950abd-20260531-arm64 \
# --build-arg DYNAMO_REF=ecae3569926410ef33b4d3d13c7d6a1b89789bb0 \
# -t nvcr.io/nvidian/dynamo-dev/biswa:prime-rl-97950abd-dynamo-ecae3569-arm64 .
#
# DYNAMO_REF may be any commit/branch/tag on https://github.com/ai-dynamo/dynamo
# (e.g. bis/rl-workers-discovery tip ecae3569…, or a release tag v1.2.0).

ARG BASE_IMAGE=nvcr.io/nvidian/dynamo-dev/biswa:prime-rl-97950abd-20260531-arm64
ARG DYNAMO_REPO=https://github.com/ai-dynamo/dynamo.git
ARG DYNAMO_REF=ecae3569926410ef33b4d3d13c7d6a1b89789bb0
ARG CARGO_BUILD_JOBS=16

# ===== Stage 1: build ai-dynamo-runtime Rust bindings wheel =====
FROM ubuntu:24.04 AS dynamo-builder
ARG DYNAMO_REPO
ARG DYNAMO_REF
ARG CARGO_BUILD_JOBS
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl build-essential pkg-config libclang-dev protobuf-compiler git \
python3 python3-dev python3-venv \
&& rm -rf /var/lib/apt/lists/* \
&& curl -fsSL https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable
ENV CARGO_HOME=/root/.cargo RUSTUP_HOME=/root/.rustup PATH=/root/.cargo/bin:${PATH}
RUN --mount=type=cache,target=/root/.cargo/registry,sharing=locked \
--mount=type=cache,target=/root/.cargo/git,sharing=locked \
cargo install maturin --locked
RUN git clone "${DYNAMO_REPO}" /build/dynamo && cd /build/dynamo && git checkout "${DYNAMO_REF}"
RUN --mount=type=cache,target=/root/.cargo/registry,sharing=locked \
--mount=type=cache,target=/root/.cargo/git,sharing=locked \
--mount=type=cache,target=/build/dynamo/lib/bindings/python/target,sharing=locked \
cd /build/dynamo/lib/bindings/python \
&& maturin build --release --out /build/dist

# ===== Stage 2: prime-rl base + dynamo (reuses base vLLM, no reinstall) =====
FROM ${BASE_IMAGE}
USER root
ENV DYNAMO_HOME=/opt/dynamo
COPY --from=dynamo-builder /build/dynamo /opt/dynamo
COPY --from=dynamo-builder /build/dist/*.whl /tmp/dynamo-wheels/

# 1) ai-dynamo-runtime (Rust bindings) + dynamo python pkg (frontend + vllm modules).
# --no-deps: do NOT pull vllm/torch/transformers (keep the prime-rl base's patched stack).
RUN uv pip install --python /app/.venv/bin/python --no-cache /tmp/dynamo-wheels/*.whl \
&& cd /opt/dynamo \
&& uv pip install --python /app/.venv/bin/python --no-cache --no-deps -e . \
&& rm -rf /tmp/dynamo-wheels

# 2) dynamo runtime deps the base may lack — EXPLICITLY excluding vllm / torch / ray.
# uvloop + nixl are required by the dynamo.vllm worker; the rest are frontend/runtime.
RUN uv pip install --python /app/.venv/bin/python --no-cache \
uvloop "nixl[cu12]<=0.10.1" \
"fastapi==0.120.1" "uvicorn==0.38.0" httpx \
"msgspec>=0.19.0" pyzmq "prometheus_client>=0.23.1" \
"aiohttp>=3.9.0,<4.0" "blake3>=1.0.0,<2.0.0" \
"kubernetes>=32.0.1,<33.0.0" \
opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp

# NOTE: etcd + nats-server are intentionally NOT installed in this image.
# In the k8s deployment dynamo uses the external dynamo-platform services
# (e.g. NATS_SERVER=nats://dynamo-platform-nats...:4222 and the platform etcd),
# so shipping the static binaries in the worker image is unnecessary bloat.

USER appuser
WORKDIR /app
2 changes: 1 addition & 1 deletion deps/pydantic-config
Submodule pydantic-config updated 0 files
2 changes: 1 addition & 1 deletion deps/renderers
2 changes: 1 addition & 1 deletion deps/research-environments
Submodule research-environments updated 30 files
+9 −3 environments/apex_shortlist/README.md
+45 −57 environments/apex_shortlist/apex_shortlist.py
+2 −2 environments/apex_shortlist/pyproject.toml
+37 −3 environments/frontierscience/README.md
+45 −20 environments/frontierscience/frontierscience.py
+2 −2 environments/frontierscience/pyproject.toml
+4 −2 environments/graphwalks/graphwalks.py
+23 −4 environments/graphwalks_rlm/graphwalks_rlm.py
+33 −0 environments/openthoughts_tblite/README.md
+98 −0 environments/openthoughts_tblite/openthoughts_tblite.py
+28 −0 environments/openthoughts_tblite/pyproject.toml
+123 −0 environments/programbench_env/README.md
+64 −0 environments/programbench_env/_programbench_constants.py
+209 −0 environments/programbench_env/_programbench_harnesses.py
+62 −0 environments/programbench_env/_programbench_rubric.py
+787 −0 environments/programbench_env/_programbench_taskset.py
+296 −0 environments/programbench_env/programbench_env.py
+37 −0 environments/programbench_env/pyproject.toml
+78 −0 environments/rlm_uuid_ctf/README.md
+22 −0 environments/rlm_uuid_ctf/pyproject.toml
+3 −0 environments/rlm_uuid_ctf/rlm_uuid_ctf/__init__.py
+1,091 −0 environments/rlm_uuid_ctf/rlm_uuid_ctf/rlm_uuid_ctf.py
+64 −0 environments/swebench_pro/README.md
+22 −0 environments/swebench_pro/pyproject.toml
+86 −0 environments/swebench_pro/swebench_pro.py
+52 −32 environments/terminal_bench_2/README.md
+3 −3 environments/terminal_bench_2/pyproject.toml
+108 −163 environments/terminal_bench_2/terminal_bench_2.py
+67 −6 tests/test_envs.py
+1,063 −0 tests/test_programbench_pypi_rewrite.py
2 changes: 1 addition & 1 deletion deps/verifiers
Submodule verifiers updated 215 files
73 changes: 73 additions & 0 deletions k8s/dynamo-deploy/admin-stub.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Optional admin-stub Deployment + Service.
# kubectl apply -f admin-stub.yaml -n <your-namespace>
#
# Only needed if your Dynamo build does NOT serve /v1/rl/* natively
# (i.e. older builds without DYN_ENABLE_RL=true). With a recent Dynamo,
# point `admin_base_url` directly at the Dynamo frontend and skip this
# manifest entirely.
apiVersion: v1
kind: ConfigMap
metadata:
name: admin-stub-script
namespace: <your-namespace>
data:
admin_stub.py: |
from http.server import HTTPServer, BaseHTTPRequestHandler
class H(BaseHTTPRequestHandler):
def do_POST(self):
n = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(n) if n else b""
print(f"[stub] POST {self.path} body={body[:200]}")
self.send_response(200)
self.end_headers()
self.wfile.write(b"OK")
def do_GET(self):
self.send_response(200)
self.end_headers()
self.wfile.write(b"OK")
HTTPServer(("0.0.0.0", 8001), H).serve_forever()
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: admin-stub
namespace: <your-namespace>
spec:
replicas: 1
selector:
matchLabels:
app: admin-stub
template:
metadata:
labels:
app: admin-stub
spec:
containers:
- name: stub
image: python:3.12-slim
command: ["python3", "/scripts/admin_stub.py"]
ports:
- containerPort: 8001
volumeMounts:
- name: script
mountPath: /scripts
resources:
requests:
memory: "64Mi"
cpu: "50m"
volumes:
- name: script
configMap:
name: admin-stub-script
---
apiVersion: v1
kind: Service
metadata:
name: admin-stub
namespace: <your-namespace>
spec:
selector:
app: admin-stub
ports:
- port: 8001
targetPort: 8001
Loading