PrimeIntellect-ai · biswapanda · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/.gitmodules b/.gitmodules
@@ -1,9 +1,9 @@
 [submodule "verifiers"]
 	path = deps/verifiers
-	url = git@github.com:PrimeIntellect-ai/verifiers.git
+	url = git@github.com:biswapanda/verifiers.git
 [submodule "renderers"]
 	path = deps/renderers
-	url = git@github.com:PrimeIntellect-ai/renderers.git
+	url = git@github.com:biswapanda/renderers.git
 [submodule "research-environments"]
 	path = deps/research-environments
 	url = git@github.com:PrimeIntellect-ai/research-environments.git

diff --git a/Dockerfile.cuda.runtime b/Dockerfile.cuda.runtime
@@ -0,0 +1,189 @@
+# Multi-stage Dockerfile for prime-rl with NVRTC support on GB200 (sm_100a).
+#
+# WHY THIS EXISTS (separate from Dockerfile.cuda):
+#   The original Dockerfile.cuda uses `python:3.12-slim` as the runtime base.
+#   That image has no CUDA toolkit, so tilelang's JIT path (which compiles the
+#   sparse-MLA kernels at runtime via NVRTC) fails with:
+#
+#       atomic.h(7): catastrophic error: cannot open source file "cuda/atomic"
+#
+#   `cuda/atomic` is a libcudacxx (CCCL) header. It is shipped only by the
+#   CUDA dev/devel toolkit, not by the `nvidia-cuda-*` pip wheels. Without it
+#   NVRTC cannot compile any kernel that pulls in tilelang's atomic.h.
+#
+#   This Dockerfile uses NVIDIA's cuda-dl-base (devel) image for both stages,
+#   so the runtime image carries the libcudacxx / CCCL headers tilelang needs
+#   and the wheels in /app/.venv keep their CUDA 12.x ABI.
+#
+# BASE IMAGE:
+#   nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04
+#     - matches dynamo/container/context.yaml `prime-rl.cuda12.9` entry
+#     - ships Python 3.12 by default (Ubuntu 24.04)
+#     - includes libcudacxx, CCCL, cuDNN, nvcc, and full CUDA dev headers
+#     - forward-compatible with the CUDA 12.8 wheels pinned in uv.lock
+#
+# USAGE:
+#   docker buildx build --platform linux/arm64 \
+#       --build-arg TARGETARCH=arm64 \
+#       -f Dockerfile.cuda.runtime -t <tag> .
+
+ARG BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base
+ARG BASE_IMAGE_TAG=25.06-cuda12.9-devel-ubuntu24.04
+
+############################
+##### Build stage ##########
+############################
+FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS builder
+LABEL maintainer="prime intellect"
+LABEL repository="prime-rl"
+
+# Set en_US.UTF-8 locale by default
+RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment
+
+# CUDA_HOME / PATH from base image are already correct (/usr/local/cuda), but
+# pin them explicitly so downstream tooling (tilelang, flash-attn) sees them.
+ENV CUDA_HOME=/usr/local/cuda
+ENV PATH=$PATH:/usr/local/cuda/bin
+
+# Install build tooling.
+ARG DEBIAN_FRONTEND=noninteractive
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Etc/UTC
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    sudo \
+    git \
+    ninja-build \
+    && apt-get clean autoclean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+# Install uv.
+ADD https://astral.sh/uv/install.sh /uv-installer.sh
+RUN INSTALLER_NO_MODIFY_PATH=1 UV_INSTALL_DIR="/usr/local/bin" sh /uv-installer.sh && rm /uv-installer.sh
+ENV PATH="/usr/local/bin:$PATH"
+ENV UV_PYTHON_INSTALL_DIR="/usr/local/share/uv/python"
+ENV UV_CACHE_DIR="/usr/local/share/uv/cache"
+
+# Install Python dependencies (gradual copies help with caching).
+WORKDIR /app
+
+ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
+
+COPY pyproject.toml /app/pyproject.toml
+COPY uv.lock /app/uv.lock
+COPY README.md /app/README.md
+COPY src/ /app/src/
+COPY packages/ /app/packages/
+COPY deps/ /app/deps/
+COPY configs /app/configs
+COPY examples /app/examples
+COPY benchmarks/scripts /app/benchmarks/scripts
+
+RUN --mount=type=cache,target=/app/.cache/uv \
+    uv sync --extra flash-attn --extra flash-attn-3 --extra flash-attn-cute --extra envs --extra gpt-oss --group mamba-ssm --locked --no-dev
+
+# arm64: build flash-attn + DeepGEMM from source.
+ARG TARGETARCH
+COPY scripts/docker-arm64-post-install.sh /app/scripts/docker-arm64-post-install.sh
+COPY scripts/install_deep_gemm.sh /app/scripts/install_deep_gemm.sh
+RUN if [ "$TARGETARCH" = "arm64" ]; then /app/scripts/docker-arm64-post-install.sh; fi
+
+# vLLM PR #39366 (two-phase DP pause) is native in vLLM 0.22 — no patch needed
+# (the rl-sdk-4 merge bumped vLLM 0.20.2 -> 0.22 and dropped this patch).
+
+############################
+##### Runtime stage ########
+############################
+# Same image so libcudacxx, CCCL headers, and the system Python 3.12 are all
+# present at runtime — the original Dockerfile.cuda switched to python:3.12-slim
+# here and lost the CUDA dev headers, which broke tilelang's NVRTC backend.
+FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}
+
+ENV CUDA_HOME=/usr/local/cuda
+ENV PATH=/usr/local/cuda/bin:$PATH
+
+ARG DEBIAN_FRONTEND=noninteractive
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Etc/UTC
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    wget \
+    clang \
+    tmux \
+    iperf \
+    openssh-server \
+    git \
+    git-lfs \
+    gpg \
+    sudo \
+    iputils-ping \
+    net-tools \
+    curl \
+    vim \
+    libibverbs1 \
+    ibverbs-providers \
+    python3.12 \
+    python3.12-venv \
+    && apt-get clean autoclean \
+    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+# Ensure `python` / `python3` point at 3.12 (ubuntu24.04 ships 3.12 already,
+# but the symlinks aren't created by default).
+RUN ln -sf /usr/bin/python3.12 /usr/local/bin/python \
+ && ln -sf /usr/bin/python3.12 /usr/local/bin/python3 \
+ && ln -sf /usr/bin/python3.12 /usr/local/bin/python3.12
+
+ARG USER_ID=1000
+ARG GROUP_ID=1000
+# Ubuntu 24.04 ships a default `ubuntu` user at uid 1000; remove it so the
+# explicit appuser keeps uid/gid 1000 (matches Dockerfile.cuda + k8s manifests).
+RUN userdel -r ubuntu 2>/dev/null || true \
+ && groupadd --gid $GROUP_ID appuser \
+ && useradd --uid $USER_ID --gid appuser --create-home --shell /bin/bash appuser \
+ && usermod -aG sudo appuser \
+ && echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+# Install uv for development use.
+ADD https://astral.sh/uv/install.sh /uv-installer.sh
+RUN INSTALLER_NO_MODIFY_PATH=1 UV_INSTALL_DIR="/usr/local/bin" sh /uv-installer.sh && rm /uv-installer.sh
+
+USER appuser
+ENV PATH="/usr/local/bin:$PATH"
+WORKDIR /app
+# Copy the application + venv from the builder.
+COPY --from=builder --chown=appuser:appuser /app /app
+
+# Copy and set up entrypoint script.
+COPY --chown=appuser:appuser scripts/docker-entrypoint.sh /app/docker-entrypoint.sh
+RUN chmod +x /app/docker-entrypoint.sh
+
+# Repoint venv Python symlinks at the runtime-stage interpreter (the builder
+# used a uv-managed Python that does not exist here).
+RUN rm /app/.venv/bin/python && ln -s /usr/bin/python3.12 /app/.venv/bin/python
+RUN rm /app/.venv/bin/python3 && ln -s /usr/bin/python3.12 /app/.venv/bin/python3
+RUN rm /app/.venv/bin/python3.12 && ln -s /usr/bin/python3.12 /app/.venv/bin/python3.12
+
+# python3.12-dev: Python.h headers for vLLM's Triton CudaUtils JIT-compile at runtime.
+# Required by vLLM serving (dynamo.vllm / inference); absent from the cuda-dl-base
+# runtime, which crashed inference with `fatal error: Python.h: No such file or directory`.
+USER root
+RUN apt-get update && apt-get install -y --no-install-recommends python3.12-dev \
+ && apt-get clean autoclean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+USER appuser
+
+# Place executables in the environment at the front of the path.
+ENV PATH="/app/.venv/bin:$PATH"
+
+# HuggingFace Hub timeouts (defaults are 10s which causes issues on slow networks).
+ENV HF_HUB_ETAG_TIMEOUT=500
+ENV HF_HUB_DOWNLOAD_TIMEOUT=300
+
+# Enable FP8 grouped-GEMM kernels in vLLM MoE layers (requires DeepGEMM, built
+# during the arm64 post-install step above).
+ENV VLLM_USE_DEEP_GEMM=1
+ENV VLLM_MOE_USE_DEEP_GEMM=1
+
+# Use entrypoint for setup (ulimit, etc) but default to sleep infinity for K8s.
+ENTRYPOINT ["/app/docker-entrypoint.sh"]
+CMD ["sleep", "infinity"]
diff --git a/Dockerfile.dynamo b/Dockerfile.dynamo
@@ -0,0 +1,79 @@
+# syntax=docker/dockerfile:1.4
+# Dockerfile.dynamo — layer ai-dynamo onto a prime-rl image WITHOUT reinstalling vLLM.
+#
+# The prime-rl base already ships vLLM 0.20.2 (+ vLLM PR #39366 two-phase pause),
+# torch, flashinfer, DeepGEMM. We build the dynamo Rust bindings (ai-dynamo-runtime,
+# via maturin) at DYNAMO_REF, then install the dynamo Python package — which provides
+# BOTH `dynamo.frontend` and `dynamo.vllm` (hatch packages = components/src/dynamo) —
+# with `--no-deps` so the base's vLLM / torch / transformers are NEVER touched.
+# A curated set of dynamo runtime deps (explicitly excluding vllm/torch/ray) is added
+# via `uv pip` (the prime-rl venv is uv-managed and has no `pip` binary).
+#
+# Build (BuildKit; run in the arm64 dind builder):
+#   DOCKER_BUILDKIT=1 docker build -f Dockerfile.dynamo \
+#     --build-arg BASE_IMAGE=nvcr.io/nvidian/dynamo-dev/biswa:prime-rl-97950abd-20260531-arm64 \
+#     --build-arg DYNAMO_REF=ecae3569926410ef33b4d3d13c7d6a1b89789bb0 \
+#     -t nvcr.io/nvidian/dynamo-dev/biswa:prime-rl-97950abd-dynamo-ecae3569-arm64 .
+#
+# DYNAMO_REF may be any commit/branch/tag on https://github.com/ai-dynamo/dynamo
+# (e.g. bis/rl-workers-discovery tip ecae3569…, or a release tag v1.2.0).
+
+ARG BASE_IMAGE=nvcr.io/nvidian/dynamo-dev/biswa:prime-rl-97950abd-20260531-arm64
+ARG DYNAMO_REPO=https://github.com/ai-dynamo/dynamo.git
+ARG DYNAMO_REF=ecae3569926410ef33b4d3d13c7d6a1b89789bb0
+ARG CARGO_BUILD_JOBS=16
+
+# ===== Stage 1: build ai-dynamo-runtime Rust bindings wheel =====
+FROM ubuntu:24.04 AS dynamo-builder
+ARG DYNAMO_REPO
+ARG DYNAMO_REF
+ARG CARGO_BUILD_JOBS
+ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      ca-certificates curl build-essential pkg-config libclang-dev protobuf-compiler git \
+      python3 python3-dev python3-venv \
+ && rm -rf /var/lib/apt/lists/* \
+ && curl -fsSL https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable
+ENV CARGO_HOME=/root/.cargo RUSTUP_HOME=/root/.rustup PATH=/root/.cargo/bin:${PATH}
+RUN --mount=type=cache,target=/root/.cargo/registry,sharing=locked \
+    --mount=type=cache,target=/root/.cargo/git,sharing=locked \
+    cargo install maturin --locked
+RUN git clone "${DYNAMO_REPO}" /build/dynamo && cd /build/dynamo && git checkout "${DYNAMO_REF}"
+RUN --mount=type=cache,target=/root/.cargo/registry,sharing=locked \
+    --mount=type=cache,target=/root/.cargo/git,sharing=locked \
+    --mount=type=cache,target=/build/dynamo/lib/bindings/python/target,sharing=locked \
+    cd /build/dynamo/lib/bindings/python \
+ && maturin build --release --out /build/dist
+
+# ===== Stage 2: prime-rl base + dynamo (reuses base vLLM, no reinstall) =====
+FROM ${BASE_IMAGE}
+USER root
+ENV DYNAMO_HOME=/opt/dynamo
+COPY --from=dynamo-builder /build/dynamo /opt/dynamo
+COPY --from=dynamo-builder /build/dist/*.whl /tmp/dynamo-wheels/
+
+# 1) ai-dynamo-runtime (Rust bindings) + dynamo python pkg (frontend + vllm modules).
+#    --no-deps: do NOT pull vllm/torch/transformers (keep the prime-rl base's patched stack).
+RUN uv pip install --python /app/.venv/bin/python --no-cache /tmp/dynamo-wheels/*.whl \
+ && cd /opt/dynamo \
+ && uv pip install --python /app/.venv/bin/python --no-cache --no-deps -e . \
+ && rm -rf /tmp/dynamo-wheels
+
+# 2) dynamo runtime deps the base may lack — EXPLICITLY excluding vllm / torch / ray.
+#    uvloop + nixl are required by the dynamo.vllm worker; the rest are frontend/runtime.
+RUN uv pip install --python /app/.venv/bin/python --no-cache \
+      uvloop "nixl[cu12]<=0.10.1" \
+      "fastapi==0.120.1" "uvicorn==0.38.0" httpx \
+      "msgspec>=0.19.0" pyzmq "prometheus_client>=0.23.1" \
+      "aiohttp>=3.9.0,<4.0" "blake3>=1.0.0,<2.0.0" \
+      "kubernetes>=32.0.1,<33.0.0" \
+      opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp
+
+# NOTE: etcd + nats-server are intentionally NOT installed in this image.
+# In the k8s deployment dynamo uses the external dynamo-platform services
+# (e.g. NATS_SERVER=nats://dynamo-platform-nats...:4222 and the platform etcd),
+# so shipping the static binaries in the worker image is unnecessary bloat.
+
+USER appuser
+WORKDIR /app
diff --git a/deps/pydantic-config b/deps/pydantic-config
diff --git a/deps/renderers b/deps/renderers
diff --git a/deps/research-environments b/deps/research-environments
diff --git a/deps/verifiers b/deps/verifiers
diff --git a/k8s/dynamo-deploy/admin-stub.yaml b/k8s/dynamo-deploy/admin-stub.yaml
@@ -0,0 +1,73 @@
+# Optional admin-stub Deployment + Service.
+# kubectl apply -f admin-stub.yaml -n <your-namespace>
+#
+# Only needed if your Dynamo build does NOT serve /v1/rl/* natively
+# (i.e. older builds without DYN_ENABLE_RL=true). With a recent Dynamo,
+# point `admin_base_url` directly at the Dynamo frontend and skip this
+# manifest entirely.
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: admin-stub-script
+  namespace: <your-namespace>
+data:
+  admin_stub.py: |
+    from http.server import HTTPServer, BaseHTTPRequestHandler
+    class H(BaseHTTPRequestHandler):
+        def do_POST(self):
+            n = int(self.headers.get("Content-Length", 0))
+            body = self.rfile.read(n) if n else b""
+            print(f"[stub] POST {self.path} body={body[:200]}")
+            self.send_response(200)
+            self.end_headers()
+            self.wfile.write(b"OK")
+        def do_GET(self):
+            self.send_response(200)
+            self.end_headers()
+            self.wfile.write(b"OK")
+    HTTPServer(("0.0.0.0", 8001), H).serve_forever()
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: admin-stub
+  namespace: <your-namespace>
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: admin-stub
+  template:
+    metadata:
+      labels:
+        app: admin-stub
+    spec:
+      containers:
+        - name: stub
+          image: python:3.12-slim
+          command: ["python3", "/scripts/admin_stub.py"]
+          ports:
+            - containerPort: 8001
+          volumeMounts:
+            - name: script
+              mountPath: /scripts
+          resources:
+            requests:
+              memory: "64Mi"
+              cpu: "50m"
+      volumes:
+        - name: script
+          configMap:
+            name: admin-stub-script
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: admin-stub
+  namespace: <your-namespace>
+spec:
+  selector:
+    app: admin-stub
+  ports:
+    - port: 8001
+      targetPort: 8001
+9 −3		environments/apex_shortlist/README.md
+45 −57		environments/apex_shortlist/apex_shortlist.py
+2 −2		environments/apex_shortlist/pyproject.toml
+37 −3		environments/frontierscience/README.md
+45 −20		environments/frontierscience/frontierscience.py
+2 −2		environments/frontierscience/pyproject.toml
+4 −2		environments/graphwalks/graphwalks.py
+23 −4		environments/graphwalks_rlm/graphwalks_rlm.py
+33 −0		environments/openthoughts_tblite/README.md
+98 −0		environments/openthoughts_tblite/openthoughts_tblite.py
+28 −0		environments/openthoughts_tblite/pyproject.toml
+123 −0		environments/programbench_env/README.md
+64 −0		environments/programbench_env/_programbench_constants.py
+209 −0		environments/programbench_env/_programbench_harnesses.py
+62 −0		environments/programbench_env/_programbench_rubric.py
+787 −0		environments/programbench_env/_programbench_taskset.py
+296 −0		environments/programbench_env/programbench_env.py
+37 −0		environments/programbench_env/pyproject.toml
+78 −0		environments/rlm_uuid_ctf/README.md
+22 −0		environments/rlm_uuid_ctf/pyproject.toml
+3 −0		environments/rlm_uuid_ctf/rlm_uuid_ctf/__init__.py
+1,091 −0		environments/rlm_uuid_ctf/rlm_uuid_ctf/rlm_uuid_ctf.py
+64 −0		environments/swebench_pro/README.md
+22 −0		environments/swebench_pro/pyproject.toml
+86 −0		environments/swebench_pro/swebench_pro.py
+52 −32		environments/terminal_bench_2/README.md
+3 −3		environments/terminal_bench_2/pyproject.toml
+108 −163		environments/terminal_bench_2/terminal_bench_2.py
+67 −6		tests/test_envs.py
+1,063 −0		tests/test_programbench_pypi_rewrite.py