From 209f76eb525722bbb69b1ed12de9d2488e607afe Mon Sep 17 00:00:00 2001
From: Andre Fu <39042250+andre-fu@users.noreply.github.com>
Date: Fri, 5 Jun 2026 20:21:11 -0700
Subject: [PATCH 01/12] fix(install): support aarch64 hosts with any compute
 capability (#2587)

The aarch64 host install path was broken: `uv sync` installs flash-attn
from PyPI source but pyproject sets FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE,
so the compiled extension never builds. `scripts/docker-arm64-post-install.sh`
fixed it for Docker GB200 builds but hardcoded sm_100 and /app/.venv,
leaving Hopper hosts (H100/H200/GH200) without a recipe.

Changes:
- `scripts/docker-arm64-post-install.sh`: auto-detect compute capability
  via nvidia-smi when available; parameterize venv path. Preserves the
  sm_100 default when no GPU is visible (Docker buildx).
- `scripts/install.sh`: call the post-install for aarch64 hosts after
  `uv sync --all-extras`. Previously the script ran uv sync and exited,
  leaving aarch64 users with a broken venv.
- `README.md`: document the aarch64 post-install step (mirrors the
  existing 3.1 Flash Attention 3 pattern).

Validated on GH200 (sm_90, aarch64):
- forward + backward parity vs torch SDPA (max diff < 0.05 / 0.25)
- 383/384 unit tests pass (the 1 failure is unrelated TileLang/MoE)
- SFT trainer smoke test (5 steps, Qwen3-0.6B) runs with flash_attention_2

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md                            |  9 ++++++
 scripts/docker-arm64-post-install.sh | 47 ++++++++++++++++++++++------
 scripts/install.sh                   | 10 ++++++
 3 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index e6321c9006..d9933608cc 100644
--- a/README.md
+++ b/README.md
@@ -110,6 +110,15 @@ source $HOME/.local/bin/env
 uv sync --all-extras
 ```
 
+4.1. On aarch64 hosts: build flash-attn from source for your GPU
+
+> *NOTE*: aarch64 has no prebuilt flash-attn wheel. This step compiles the CUDA extension for your local GPU (~20-30 minutes). Compute capability is auto-detected from `nvidia-smi`; override with `TORCH_CUDA_ARCH_LIST=9.0` (Hopper) / `10.0` (Blackwell) if needed.
+> *NOTE*: After this step, you can't run `uv sync --all-extras` or `uv run` as it will uninstall the package, you can avoid it by running `uv sync --inexact` or `uv run --no-sync`.
+
+```bash
+bash scripts/docker-arm64-post-install.sh
+```
+
 3.1. Optional: Install Flash Attention 3 (on Hopper GPUs only, for flash_attention_3 attention backend)
 
 > *NOTE*: This step will take a while, as it builds the Flash Attention 3 extension from source, as it has no wheels prebuilt.
diff --git a/scripts/docker-arm64-post-install.sh b/scripts/docker-arm64-post-install.sh
index f02b3070b3..55f85a3a03 100755
--- a/scripts/docker-arm64-post-install.sh
+++ b/scripts/docker-arm64-post-install.sh
@@ -1,17 +1,44 @@
 #!/bin/bash
-# arm64 post-install fixups for Docker builds.
-set -e
+# arm64 post-install fixups: rebuild flash-attn from source for the target GPU.
+#
+# Why this exists: pyproject.toml sets FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE to keep
+# `uv sync` fast; on x86_64 it pins a prebuilt wheel to fill in the binary, but no
+# such wheel exists for aarch64. Without this script, `import flash_attn` fails on
+# aarch64 with `ModuleNotFoundError: No module named 'flash_attn_2_cuda'`.
+#
+# Defaults preserve the existing Docker behavior (sm_100 / GB200). On a host with
+# `nvidia-smi` available, the compute capability is auto-detected from the local
+# GPU. Override via env vars if needed:
+#   TORCH_CUDA_ARCH_LIST   e.g. 9.0 (Hopper), 10.0 (Blackwell)
+#   VENV_PATH              path to the venv (default: $(pwd)/.venv)
+#   MAX_JOBS               parallel nvcc jobs (default: 4)
+set -euo pipefail
 
-echo "=== building flash-attn from source (sm_100 / GB200) ==="
-# Run from /tmp so uv doesn't read pyproject.toml's [tool.uv.extra-build-variables]
-# which sets FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE and prevents CUDA kernel compilation.
-export TORCH_CUDA_ARCH_LIST="10.0"
-export MAX_JOBS=4
+if [ -z "${TORCH_CUDA_ARCH_LIST:-}" ]; then
+    # Try to detect from the local GPU. Tolerate any failure mode (binary missing,
+    # driver not loaded, Docker buildx without --gpus) and fall back to GB200.
+    TORCH_CUDA_ARCH_LIST="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d ' ' || true)"
+    : "${TORCH_CUDA_ARCH_LIST:=10.0}"
+fi
+export TORCH_CUDA_ARCH_LIST
+
+VENV_PATH="${VENV_PATH:-$(pwd)/.venv}"
+if [ ! -x "$VENV_PATH/bin/python" ]; then
+    echo "ERROR: no python at $VENV_PATH/bin/python. Run from the project root or set VENV_PATH." >&2
+    exit 1
+fi
+
+export MAX_JOBS="${MAX_JOBS:-4}"
 export FLASH_ATTENTION_FORCE_BUILD=TRUE
 export FLASH_ATTENTION_SKIP_CUDA_BUILD=FALSE
-(cd /tmp && uv pip install --python /app/.venv/bin/python \
-    "flash-attn==2.8.3" --no-build-isolation --no-binary flash-attn --no-cache)
+
+echo "=== building flash-attn from source (TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST, MAX_JOBS=$MAX_JOBS) ==="
+echo "    target venv: $VENV_PATH"
+# Run from /tmp so uv ignores the project's [tool.uv.extra-build-variables],
+# which sets FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE and would prevent kernel compilation.
+(cd /tmp && uv pip install --python "$VENV_PATH/bin/python" \
+    "flash-attn==2.8.3" --no-build-isolation --no-binary flash-attn --no-cache --reinstall-package flash-attn)
 
 echo "=== reinstalling flash-attn-cute (flash-attn overwrites it with a stub) ==="
-uv pip install --reinstall --no-deps \
+uv pip install --python "$VENV_PATH/bin/python" --reinstall --no-deps \
     "flash-attn-4 @ git+https://github.com/Dao-AILab/flash-attention.git@96bd151#subdirectory=flash_attn/cute"
diff --git a/scripts/install.sh b/scripts/install.sh
index 7e03b5aa71..630bf1d576 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -159,6 +159,16 @@ main() {
   log_info "Installing pre-commit hooks..."
   uv run pre-commit install
 
+  # aarch64 has no prebuilt flash-attn wheel; build it from source for the local GPU.
+  # Without this, `import flash_attn` fails with `ModuleNotFoundError: flash_attn_2_cuda`.
+  # Run last so no subsequent uv operation (which implicitly syncs against the lockfile)
+  # rebuilds flash-attn from PyPI with FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE and undoes this.
+  if [ "$(uname -m)" = "aarch64" ]; then
+    log_info "aarch64 detected: building flash-attn from source (this takes 20-30 minutes)..."
+    log_warn "Future 'uv sync --all-extras' or 'uv run' will remove this build. Use 'uv sync --inexact' or 'uv run --no-sync' to keep it."
+    bash scripts/docker-arm64-post-install.sh
+  fi
+
   log_info "Installation completed!"
 }
 

From 766e36f00092bd8b71f19d7a268ef8e836eba044 Mon Sep 17 00:00:00 2001
From: Erik Schultheis <7938269+ngc92@users.noreply.github.com>
Date: Sun, 7 Jun 2026 17:51:36 +0200
Subject: [PATCH 02/12] Feat/fp8 fused transpose cast (#2724)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(fp8): fuse transpose into the dX block-fp8 weight cast

The dX backward built `weight.transpose(0, 1).contiguous()` and re-cast it to
fp8 every step, materializing a full bf16 transpose buffer plus an extra
read/write pass. Add `per_block_cast_to_fp8_tp_triton`, which produces the
block-fp8 of `weight.T` directly by reusing the existing per-block kernel with
swapped output/scale strides — no intermediate buffer.

128x128 block quantization is transpose-symmetric, so the result is
bit-identical to casting the materialized transpose; DeepGEMM receives an
identical B tensor. Verified byte-for-byte across shapes; ~14x faster on a
4096x4096 weight (373 -> 27 us).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* deslopified

* also add fused implementation for per-token

* Fix: skip tests on <Hopper

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: S1ro1 <matej.sirovatka@gmail.com>
---
 .../trainer/models/kernels/fp8_utils.py       | 68 +++++++++++++++++++
 .../trainer/models/layers/fp8_linear.py       | 11 ++-
 tests/unit/train/models/test_fp8_utils.py     | 49 +++++++++++++
 3 files changed, 122 insertions(+), 6 deletions(-)
 create mode 100644 tests/unit/train/models/test_fp8_utils.py

diff --git a/src/prime_rl/trainer/models/kernels/fp8_utils.py b/src/prime_rl/trainer/models/kernels/fp8_utils.py
index 82c26fb7e4..4595640f28 100644
--- a/src/prime_rl/trainer/models/kernels/fp8_utils.py
+++ b/src/prime_rl/trainer/models/kernels/fp8_utils.py
@@ -550,3 +550,71 @@ def per_block_cast_to_fp8_triton(
         gran_k,
     )
     return out[0], sf[0]
+
+
+def per_block_cast_to_fp8_tp_triton(
+    x: torch.Tensor, use_ue8m0: bool, gran_k: int = GROUP_ALIGNMENT
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Block-fp8 cast of ``x.T`` without materializing the transpose."""
+    assert x.dim() == 2
+    assert gran_k == GROUP_ALIGNMENT
+    rows, cols = x.shape
+    x3 = x.unsqueeze(0)
+    out = torch.empty((cols, rows), device=x.device, dtype=torch.float8_e4m3fn)
+    sf = torch.empty((ceil_div(cols, gran_k), ceil_div(rows, gran_k)), device=x.device, dtype=torch.float32)
+    grid = (1, ceil_div(rows, gran_k), ceil_div(cols, gran_k))
+    _grouped_per_block_fp8_kernel[grid](
+        x3,
+        out,
+        sf,
+        1,
+        rows,
+        cols,
+        x3.stride(0),
+        x3.stride(1),
+        x3.stride(2),
+        # transposed output: x's element (row, col) lands at out[col, row]
+        cols * rows,
+        1,
+        rows,
+        # transposed scales: x's tile (pid_m, pid_n) lands at sf[pid_n, pid_m]
+        ceil_div(cols, gran_k) * ceil_div(rows, gran_k),
+        1,
+        ceil_div(rows, gran_k),
+        USE_UE8M0=use_ue8m0,
+        BLOCK_M=gran_k,
+        BLOCK_N=gran_k,
+        num_warps=8,
+    )
+    return out, sf
+
+
+def per_token_cast_to_fp8_tp_triton(
+    x: torch.Tensor, use_ue8m0: bool, gran_k: int = GROUP_ALIGNMENT
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Per-token fp8 cast of ``x.T`` without materializing the transpose."""
+    assert x.dim() == 2
+    assert gran_k == GROUP_ALIGNMENT
+    rows, cols = x.shape
+    out = torch.empty((cols, rows), device=x.device, dtype=torch.float8_e4m3fn)
+    sf = torch.empty((cols, ceil_div(rows, gran_k)), device=x.device, dtype=torch.float32)
+    grid = lambda meta: (ceil_div(cols, meta["BLOCK_M"]), ceil_div(rows, meta["BLOCK_K"]))
+    _per_token_fp8_kernel[grid](
+        x,
+        out,
+        sf,
+        cols,
+        rows,
+        # transposed read: the kernel's per-row amax reduces over x's rows
+        x.stride(1),
+        x.stride(0),
+        out.stride(0),
+        out.stride(1),
+        sf.stride(0),
+        sf.stride(1),
+        USE_UE8M0=use_ue8m0,
+        BLOCK_M=8,
+        BLOCK_K=gran_k,
+        num_warps=4,
+    )
+    return out, sf
diff --git a/src/prime_rl/trainer/models/layers/fp8_linear.py b/src/prime_rl/trainer/models/layers/fp8_linear.py
index 5f7f675dc3..0cd20bf936 100644
--- a/src/prime_rl/trainer/models/layers/fp8_linear.py
+++ b/src/prime_rl/trainer/models/layers/fp8_linear.py
@@ -12,7 +12,9 @@
 from torch import nn
 
 from prime_rl.trainer.models.kernels.fp8_utils import (
+    per_block_cast_to_fp8_tp_triton,
     per_block_cast_to_fp8_triton,
+    per_token_cast_to_fp8_tp_triton,
     per_token_cast_to_fp8_triton,
 )
 from prime_rl.utils.logger import get_logger
@@ -43,8 +45,7 @@ def backward(ctx, grad_output):
         grad_x = grad_weight = None
         if ctx.needs_input_grad[0]:
             grad_output_fp8 = per_token_cast_to_fp8_triton(grad_output_2d, False, block_size)
-            weight_t = weight.transpose(0, 1).contiguous()
-            weight_dx_fp8 = per_block_cast_to_fp8_triton(weight_t, False, block_size)
+            weight_dx_fp8 = per_block_cast_to_fp8_tp_triton(weight, False, block_size)
             grad_x_2d = torch.empty_like(x_2d)
             deep_gemm.fp8_gemm_nt(grad_output_fp8, weight_dx_fp8, grad_x_2d)
             grad_x = grad_x_2d.reshape(ctx.x_shape)
@@ -62,10 +63,8 @@ def backward(ctx, grad_output):
             else:
                 grad_output_2d_padded = grad_output_2d
                 x_2d_padded = x_2d
-            grad_output_t = grad_output_2d_padded.transpose(0, 1).contiguous()
-            x_t = x_2d_padded.transpose(0, 1).contiguous()
-            grad_output_t_fp8 = per_token_cast_to_fp8_triton(grad_output_t, False, block_size)
-            x_t_fp8 = per_token_cast_to_fp8_triton(x_t, False, block_size)
+            grad_output_t_fp8 = per_token_cast_to_fp8_tp_triton(grad_output_2d_padded, False, block_size)
+            x_t_fp8 = per_token_cast_to_fp8_tp_triton(x_2d_padded, False, block_size)
             grad_weight_fp32 = torch.zeros_like(weight, dtype=torch.float32)
             deep_gemm.fp8_gemm_nt(
                 grad_output_t_fp8,
diff --git a/tests/unit/train/models/test_fp8_utils.py b/tests/unit/train/models/test_fp8_utils.py
new file mode 100644
index 0000000000..f885f1076a
--- /dev/null
+++ b/tests/unit/train/models/test_fp8_utils.py
@@ -0,0 +1,49 @@
+import pytest
+import torch
+
+from prime_rl.trainer.models.kernels.fp8_utils import (
+    per_block_cast_to_fp8_tp_triton,
+    per_block_cast_to_fp8_triton,
+    per_token_cast_to_fp8_tp_triton,
+    per_token_cast_to_fp8_triton,
+)
+
+pytestmark = [
+    pytest.mark.gpu,
+    pytest.mark.skipif(
+        not torch.cuda.is_available() or torch.cuda.get_device_capability()[0] < 9,
+        reason="block-fp8 cast kernels use Triton fp8e4nv (e4m3), only supported on Hopper (SM90) and newer",
+    ),
+]
+
+
+@pytest.mark.parametrize("rows,cols", [(256, 256), (256, 512), (512, 256), (1024, 768), (384, 128)])
+def test_block_tp_cast_matches_materialized_transpose(rows, cols):
+    """The fused transpose+cast is *bit-identical* to unfused."""
+    torch.manual_seed(rows + cols)
+    x = torch.randn(rows, cols, device="cuda", dtype=torch.bfloat16) * 0.3
+
+    ref_q, ref_s = per_block_cast_to_fp8_triton(x.transpose(0, 1).contiguous(), False)
+    tp_q, tp_s = per_block_cast_to_fp8_tp_triton(x, False)
+
+    assert tp_q.shape == ref_q.shape == (cols, rows)
+    assert tp_s.shape == ref_s.shape
+    assert tp_q.is_contiguous()
+    assert torch.equal(tp_q.view(torch.uint8), ref_q.view(torch.uint8))
+    assert torch.equal(tp_s, ref_s)
+
+
+@pytest.mark.parametrize("rows,cols", [(256, 512), (512, 256), (128, 1024), (1024, 768), (384, 512)])
+def test_token_tp_cast_matches_materialized_transpose(rows, cols):
+    """The fused transpose+cast is *bit-identical* to unfused."""
+    torch.manual_seed(rows + cols)
+    x = torch.randn(rows, cols, device="cuda", dtype=torch.bfloat16) * 0.3
+
+    ref_q, ref_s = per_token_cast_to_fp8_triton(x.transpose(0, 1).contiguous(), False)
+    tp_q, tp_s = per_token_cast_to_fp8_tp_triton(x, False)
+
+    assert tp_q.shape == ref_q.shape == (cols, rows)
+    assert tp_s.shape == ref_s.shape
+    assert tp_q.is_contiguous()
+    assert torch.equal(tp_q.view(torch.uint8), ref_q.view(torch.uint8))
+    assert torch.equal(tp_s, ref_s)

From 90b074492ea4ef29e31b4d184b099d7878d8c789 Mon Sep 17 00:00:00 2001
From: Matej Sirovatka <54212263+S1ro1@users.noreply.github.com>
Date: Mon, 8 Jun 2026 08:58:23 -0700
Subject: [PATCH 03/12] Feat: fix weight reload to cpu optim (#2729)

---
 src/prime_rl/trainer/ckpt.py | 74 +++++++++++++++++++++++++-----------
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/src/prime_rl/trainer/ckpt.py b/src/prime_rl/trainer/ckpt.py
index dd23ae345e..d699b5f097 100644
--- a/src/prime_rl/trainer/ckpt.py
+++ b/src/prime_rl/trainer/ckpt.py
@@ -9,7 +9,7 @@
 
 import torch
 from torch import Tensor, nn
-from torch.distributed.checkpoint.state_dict import get_state_dict, set_state_dict
+from torch.distributed.checkpoint.state_dict import get_state_dict, set_model_state_dict, set_state_dict
 from torch.distributed.checkpoint.state_dict_loader import load as dcp_load
 from torch.distributed.checkpoint.state_dict_saver import save as dcp_save
 from torch.distributed.checkpoint.stateful import Stateful
@@ -67,8 +67,13 @@ def _get_base_optimizers(self) -> list[Optimizer]:
         """Extract base optimizers from wrappers like CPUOffloadOptimizer."""
         return [opt.base_optimizer if isinstance(opt, CPUOffloadOptimizer) else opt for opt in self.optimizers]
 
+    def _has_cpu_offload(self) -> bool:
+        return any(isinstance(opt, CPUOffloadOptimizer) for opt in self.optimizers)
+
     def state_dict(self) -> dict[str, Any]:
-        # Move CPU-offloaded states to GPU before checkpointing
+        # get_state_dict requires optimizer states to live on param.device. For an
+        # already-initialized CPU-offload optimizer that means staging back to GPU
+        # before the call; the matching offload happens after the dict is built.
         for opt in self.optimizers:
             if isinstance(opt, CPUOffloadOptimizer) and opt._initialized:
                 opt._move_states("cuda")
@@ -88,26 +93,53 @@ def state_dict(self) -> dict[str, Any]:
             progress_state_dict = asdict(self.progress)
             state_dict["progress"] = progress_state_dict
 
-        # Move states back to CPU
+        # Offload optimizer states to CPU for every CPUOffloadOptimizer, including
+        # ones that were uninitialized on entry. dcp_load calls this method to build
+        # a template, and get_state_dict's internal _init_optim_state populates an
+        # empty optim.state with GPU tensors. Optimizer.state_dict() returns those
+        # values via shallow copy, so optimizer_state_dict["state"][fqn] is the same
+        # dict object as optim.state[param]. Replacing the entries with CPU tensors
+        # in place therefore flips the template too — dcp_load reads bytes from disk
+        # straight into CPU storage and optim.state is loaded by the time the load
+        # returns, without GPU optimizer state ever existing for the duration of the
+        # read.
+        has_cpu_offload = self._has_cpu_offload()
         for opt in self.optimizers:
-            if isinstance(opt, CPUOffloadOptimizer) and opt._initialized:
+            if isinstance(opt, CPUOffloadOptimizer):
                 opt._move_states("cpu")
+        if has_cpu_offload:
+            gc.collect()
+            torch.cuda.empty_cache()
 
         return state_dict
 
     def load_state_dict(self, state_dict: dict[str, Any]):
         base_optimizers = self._get_base_optimizers()
-        set_state_dict(
-            self.model, base_optimizers, model_state_dict=state_dict["model"], optim_state_dict=state_dict["optimizers"]
-        )
+        has_cpu_offload = self._has_cpu_offload()
 
-        # Re-initialize CPU offload wrappers after loading
-        has_cpu_offload = False
-        for opt in self.optimizers:
-            if isinstance(opt, CPUOffloadOptimizer):
-                opt._move_states("cpu")
-                opt._initialized = True
-                has_cpu_offload = True
+        if has_cpu_offload:
+            # When CPU offload is on, the optimizer is already loaded by the time we
+            # get here: state_dict() handed dcp_load a template whose tensors share
+            # storage with optim.state[p][k], and dcp_load wrote the checkpoint bytes
+            # directly into those tensors via target_tensor.copy_(...). Running
+            # set_state_dict on the optimizer would route the loaded CPU values
+            # through Optimizer.load_state_dict, whose _cast hook does
+            # value.to(param.dtype, param.device) and would allocate a fresh GPU
+            # copy of every state tensor — undoing the in-place CPU load and
+            # detaching optim.state from the tensors we just populated. So we only
+            # apply the model side here and flip the wrappers to initialized so
+            # subsequent steps take the steady-state path.
+            set_model_state_dict(self.model, model_state_dict=state_dict["model"])
+            for opt in self.optimizers:
+                if isinstance(opt, CPUOffloadOptimizer):
+                    opt._initialized = True
+        else:
+            set_state_dict(
+                self.model,
+                base_optimizers,
+                model_state_dict=state_dict["model"],
+                optim_state_dict=state_dict["optimizers"],
+            )
 
         if self.scheduler is not None:
             self.scheduler.load_state_dict(state_dict["scheduler"])
@@ -115,15 +147,13 @@ def load_state_dict(self, state_dict: dict[str, Any]):
             for key, value in state_dict["progress"].items():
                 setattr(self.progress, key, value)
 
-        # Reclaim GPU memory freed by moving optimizer states to CPU.
-        # After set_state_dict + _move_states("cpu"), the optimizer states live on CPU,
-        # but the state_dict (owned by dcp_load) still holds references to stale GPU
-        # optimizer tensors. Clearing them and flushing the CUDA cache prevents OOM on
-        # the first training step.
+        # state_dict is the same dict object that dcp_load held internally; clearing
+        # it drops the last references to the loaded tensor wrappers so the cuda
+        # allocator can release whatever blocks it cached during the read.
         if has_cpu_offload:
-            state_dict.clear()  # drop stale GPU tensor references from dcp_load
-            gc.collect()  # break any circular references so tensors are freed
-            torch.cuda.empty_cache()  # return freed GPU memory to CUDA
+            state_dict.clear()
+            gc.collect()
+            torch.cuda.empty_cache()
 
 
 class CheckpointManager:

From c2a5fa466c358d150a65b8b18e2e2197b5231cfa Mon Sep 17 00:00:00 2001
From: minh hoang <13672394+eexwhyzee@users.noreply.github.com>
Date: Mon, 8 Jun 2026 15:16:58 -0700
Subject: [PATCH 04/12] chore(renderers): bump to submodule to
 renderers-v0.1.8.dev41 (#2732)

---
 deps/renderers | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deps/renderers b/deps/renderers
index 596c15ffd9..e6dba5ad6c 160000
--- a/deps/renderers
+++ b/deps/renderers
@@ -1 +1 @@
-Subproject commit 596c15ffd9da779290bfd0fdcad520688de14a4e
+Subproject commit e6dba5ad6c50ca83d4ffa462145037082542e52a

From 53b22b55c8bcc6bcdc13ce42c7830f18aee2668b Mon Sep 17 00:00:00 2001
From: samsja <55492238+samsja@users.noreply.github.com>
Date: Mon, 8 Jun 2026 16:34:24 -0700
Subject: [PATCH 05/12] fix: add prime-pydantic-config as direct dep so uv uses
 editable path source (#2733)

The [tool.uv.sources] override for prime-pydantic-config was being ignored
because it was only a transitive dependency (via prime-rl-configs). uv only
applies source overrides for packages that appear in project.dependencies.
Adding it as a direct dependency makes uv resolve from the local editable
path (deps/pydantic-config) instead of PyPI.
---
 pyproject.toml |  1 +
 uv.lock        | 28 ++++++++++++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 625a879d5a..b5f2a2d2dd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,7 @@ readme = "README.md"
 requires-python = "~=3.12.0"
 dependencies = [
     "prime-rl-configs",
+    "prime-pydantic-config",
     "beartype>=0.21.0",
     "datasets>=4.0.0",
     "jaxtyping>=0.3.2",
diff --git a/uv.lock b/uv.lock
index 5c15a1668a..03e1a1ddae 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3476,21 +3476,35 @@ wheels = [
 
 [[package]]
 name = "prime-pydantic-config"
-version = "0.3.0.dev86"
-source = { registry = "https://pypi.org/simple" }
+version = "0.3.0"
+source = { editable = "deps/pydantic-config" }
 dependencies = [
     { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ad/34/006fc720a8fcda84706793582d50a2028bf6950fb7a0eedb59d3f6555261/prime_pydantic_config-0.3.0.dev86.tar.gz", hash = "sha256:1139bb6d21a8cf134e212ee4e529e5150f2db7422b42eae3ca69a5c77b8a69f5", size = 75656, upload-time = "2026-06-02T01:08:19.079Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b6/a3/ded48c436cd56ddac3b216458ac458eaf069b55e0ca3be506b2508d16fa2/prime_pydantic_config-0.3.0.dev86-py3-none-any.whl", hash = "sha256:51ac33ae1b5de9ba2e44eb9a91242d9dd783784234942f166f6e8974bcdf1577", size = 27437, upload-time = "2026-06-02T01:08:20.23Z" },
-]
 
 [package.optional-dependencies]
 toml = [
     { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
+[package.metadata]
+requires-dist = [
+    { name = "pydantic", specifier = ">=2.0.0" },
+    { name = "pyyaml", marker = "extra == 'all'" },
+    { name = "pyyaml", marker = "extra == 'yaml'" },
+    { name = "tomli", marker = "extra == 'all'" },
+    { name = "tomli", marker = "extra == 'toml'" },
+]
+provides-extras = ["yaml", "toml", "all"]
+
+[package.metadata.requires-dev]
+dev = [
+    { name = "pre-commit", specifier = ">=3.0.0" },
+    { name = "pytest", specifier = ">=9.0.3" },
+    { name = "rich", specifier = ">=15.0.0" },
+    { name = "ruff", specifier = ">=0.12.1" },
+]
+
 [[package]]
 name = "prime-rl"
 version = "0.5.0"
@@ -3509,6 +3523,7 @@ dependencies = [
     { name = "nvidia-ml-py", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "prime-pydantic-config", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-rl-configs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "pyarrow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "pybase64", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -3662,6 +3677,7 @@ requires-dist = [
     { name = "opencode-science", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_science" },
     { name = "opencode-swe", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_swe" },
     { name = "prime", specifier = ">=0.6.4" },
+    { name = "prime-pydantic-config", editable = "deps/pydantic-config" },
     { name = "prime-rl", extras = ["disagg"], marker = "extra == 'all'" },
     { name = "prime-rl", extras = ["flash-attn"], marker = "extra == 'all'" },
     { name = "prime-rl", extras = ["flash-attn-3"], marker = "extra == 'all'" },

From 54012df504f30f66f6d6094da73a222494d7e867 Mon Sep 17 00:00:00 2001
From: faresobeid <111092724+faresobeid@users.noreply.github.com>
Date: Tue, 9 Jun 2026 01:48:12 +0100
Subject: [PATCH 06/12] orch improvements (#2725)

* orch improvements

* fixes
---
 pyproject.toml                            |   1 +
 src/prime_rl/orchestrator/dispatcher.py   |  43 +++---
 src/prime_rl/orchestrator/envs.py         | 151 +---------------------
 src/prime_rl/orchestrator/orchestrator.py |  22 +++-
 src/prime_rl/transport/zmq.py             |   9 +-
 uv.lock                                   |   2 +
 6 files changed, 50 insertions(+), 178 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b5f2a2d2dd..719b1228e6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,6 +28,7 @@ dependencies = [
     "aiolimiter>=1.2.1",
     "tenacity>=8.2.0",
     "openai>=1.106.1",
+    "orjson>=3.11.0",
     "rich>=14.0.0",
     "setproctitle>=1.3.0",
     "uvloop>=0.21.0",
diff --git a/src/prime_rl/orchestrator/dispatcher.py b/src/prime_rl/orchestrator/dispatcher.py
index 133bc08da0..dcc5455daa 100644
--- a/src/prime_rl/orchestrator/dispatcher.py
+++ b/src/prime_rl/orchestrator/dispatcher.py
@@ -12,7 +12,9 @@
   source's emptiness), so in-flight rollouts of the opposite kind drain
   naturally on either side of an eval boundary.
 - ``on_new_version`` (called by the watcher) bumps ``off_policy_steps`` on
-  every in-flight rollout and drops groups past ``max_off_policy_steps``.
+  in-flight train rollouts and drops groups past ``max_off_policy_steps``.
+  Eval rollouts are measurements for the policy version they started with,
+  so they are allowed to finish even if training advances.
   Cancellations surface as synthetic ``Cancelled`` markers so the sink's
   count-to-``group_size`` finalization still fires.
 """
@@ -262,32 +264,33 @@ async def stop(self) -> None:
     async def on_new_version(self, step: int) -> None:
         """Bump off-policy counters and drop groups past
         ``max_off_policy_steps`` (drop_group emits ``Cancelled`` markers so
-        the sink still finalizes the partial group)."""
-        stale_groups: dict[uuid.UUID, RolloutKind] = {}
-        cancelled_by_kind: dict[RolloutKind, int] = {"train": 0, "eval": 0}
+        the sink still finalizes the partial group). Eval rollouts are not
+        aged because they are tied to their start-time policy version."""
+        stale_groups: set[uuid.UUID] = set()
+        cancelled = 0
         for meta in self.inflight.values():
+            if meta.kind != "train":
+                continue
             meta.off_policy_steps += 1
             if meta.off_policy_steps > self.max_off_policy_steps:
-                stale_groups[meta.group_id] = meta.kind
+                stale_groups.add(meta.group_id)
 
-        for gid, kind in stale_groups.items():
+        for gid in stale_groups:
             removed = await self.drop_group(gid)
-            cancelled_by_kind[kind] += removed
+            cancelled += removed
 
-        for kind in ("train", "eval"):
-            n = cancelled_by_kind[kind]
-            if n:
-                get_logger().warning(
-                    f"Cancelled {n} {kind} rollouts past max_off_policy_steps={self.max_off_policy_steps}. "
-                    "Consider increasing it to avoid this."
-                )
+        if cancelled:
+            get_logger().warning(
+                f"Cancelled {cancelled} train rollouts past max_off_policy_steps={self.max_off_policy_steps}. "
+                "Consider increasing it to avoid this."
+            )
 
     async def fill_inflight(self) -> None:
         """Schedule new rollouts up to ``max_inflight``, honoring
-        ``self.mode``. When ``PREFER_EVAL``'s source exhausts we flip back
-        to ``PREFER_TRAIN`` so the eval tail drains alongside fresh train."""
-        if not self.dispatch_allowed.is_set():
-            return
+        ``self.mode``. Eval scheduling ignores the orchestrator's dispatch
+        gate (evals are version-pinned measurements); only train scheduling
+        respects it. When ``PREFER_EVAL``'s source exhausts we flip back to
+        ``PREFER_TRAIN`` so the eval tail drains alongside fresh train."""
         while True:
             if self.available_permits <= 0:
                 return
@@ -308,7 +311,9 @@ async def fill_inflight(self) -> None:
                 scheduled = await self.try_schedule("eval")
                 if not scheduled:
                     return
-            else:  # PREFER_TRAIN
+            else:  # PREFER_TRAIN — respects the orchestrator's dispatch gate
+                if not self.dispatch_allowed.is_set():
+                    return
                 scheduled = await self.try_schedule("train")
                 if not scheduled:
                     return
diff --git a/src/prime_rl/orchestrator/envs.py b/src/prime_rl/orchestrator/envs.py
index fe02d2e61a..8d3f072720 100644
--- a/src/prime_rl/orchestrator/envs.py
+++ b/src/prime_rl/orchestrator/envs.py
@@ -1,24 +1,18 @@
 from __future__ import annotations
 
-import asyncio
 import atexit
 import multiprocessing as mp
-import time
-from collections.abc import Awaitable, Callable, Iterator, Sequence
+from collections.abc import Iterator, Sequence
 from multiprocessing.process import BaseProcess
 from pathlib import Path
 from typing import Generic, TypeVar
 
-import pandas as pd
 import verifiers as vf
 from verifiers.serve import ZMQEnvClient, ZMQEnvServer
 from verifiers.utils.serve_utils import get_free_port
 
 from prime_rl.configs.orchestrator import EnvConfig, EvalEnvConfig, TrainEnvConfig
-from prime_rl.orchestrator.eval_utils import compute_pass_at_k
-from prime_rl.utils.logger import ProgressTracker, get_logger
-from prime_rl.utils.monitor import get_monitor
-from prime_rl.utils.utils import capitalize
+from prime_rl.utils.logger import get_logger
 
 REQUIRED_STATE_COLUMNS = ["trajectory"]
 
@@ -183,147 +177,6 @@ def __init__(self, config: EvalEnvConfig):
         self.sampling_args = config.sampling.to_sampling_args()
         self.examples = self.env.get_eval_dataset(n=config.num_examples).to_list()
 
-    async def evaluate(
-        self,
-        model_name: str,
-        get_client: Callable[[], Awaitable[vf.ClientConfig]],
-        step: int,
-        cache_salt: str,
-    ) -> list[vf.RolloutOutput]:
-        num_examples = len(self.examples)
-        group_size = self.config.group_size
-        get_logger().info(f"Evaluating {self.name} ({num_examples=}, {group_size=})")
-        total_rollouts = num_examples * group_size
-        pbar = ProgressTracker(total=total_rollouts, desc=f"Evaluating {self.name}")
-        eval_start = time.perf_counter()
-
-        if self.requires_group_scoring:
-
-            async def run_with_progress(example: dict) -> list[vf.RolloutOutput] | None:
-                """Run group_size rollouts as a scored group for one example."""
-                try:
-                    client = await get_client()
-                    outputs = await self.run_group(
-                        client=client,
-                        example=example,
-                        model_name=model_name,
-                        group_size=group_size,
-                        cache_salt=cache_salt,
-                    )
-                    pbar.update(group_size)
-                    return outputs
-                except Exception as e:
-                    get_logger().warning(f"Group failed: {e}")
-                    pbar.update(group_size)
-                    return None
-
-            coros = [run_with_progress(example) for example in self.examples]
-
-        else:
-
-            async def run_with_progress(example: dict) -> list[vf.RolloutOutput] | None:
-                """Run a single rollout for one example."""
-                try:
-                    client = await get_client()
-                    output = await self.run_rollout(
-                        client=client, example=example, model_name=model_name, cache_salt=cache_salt
-                    )
-                    pbar.update(1)
-                    return [output]
-                except Exception as e:
-                    get_logger().warning(f"Rollout failed: {e}")
-                    pbar.update(1)
-                    return None
-
-            coros = [run_with_progress(example) for example in self.examples for _ in range(group_size)]
-
-        try:
-            results = await asyncio.gather(*coros)
-        finally:
-            pbar.close()
-
-        successful_outputs = [o for group in results if group is not None for o in group]
-        failed_count = total_rollouts - len(successful_outputs)
-        eval_time = time.perf_counter() - eval_start
-
-        if failed_count:
-            get_logger().warning(
-                f"{failed_count}/{total_rollouts} ({failed_count / total_rollouts * 100:.1f}%) rollouts failed"
-            )
-
-        if not successful_outputs:
-            get_logger().warning(f"All rollouts failed for {self.name}, skipping logging metrics")
-            get_monitor().log(
-                {
-                    f"eval/{self.name}/failed_rollouts": failed_count / total_rollouts,
-                    "step": step,
-                },
-                step=step,
-            )
-            return []
-
-        # Log metrics
-        monitor = get_monitor()
-
-        rows = [
-            {
-                "example_id": o["example_id"],
-                "reward": o["reward"],
-                "completion_len": o["token_usage"]["final_output_tokens"],
-                "is_truncated": o["is_truncated"],
-                "has_error": o.get("error") is not None,
-                "no_response": not o.get("completion"),
-            }
-            for o in successful_outputs
-        ]
-        results_df = pd.DataFrame(rows)
-
-        unique_rewards = results_df.reward.dropna().unique()
-        could_be_binary = set(unique_rewards).issubset({0.0, 1.0})
-        if could_be_binary:
-            pass_at_k = (
-                results_df.groupby("example_id")
-                .apply(lambda x: compute_pass_at_k(x.reward.dropna()), include_groups=False)
-                .apply(pd.Series)
-            )
-        else:
-            pass_at_k = None
-            get_logger().warning("Skipping computing pass@k rates because the task rewards appear to be non-binary")
-
-        message = f"Evaluated {self.name} in {eval_time:.2f}s (Avg@{group_size}={results_df.reward.mean():.4f}"
-        if could_be_binary:
-            assert pass_at_k is not None
-            for pass_rate, pass_rate_score in pd.Series(pass_at_k.mean()).items():
-                message += f", {capitalize(str(pass_rate))}: {pass_rate_score:.4f}"
-
-        message += (
-            f", No-response: {results_df.no_response.mean() * 100:.1f}%"
-            f", Completion Length: {results_df.completion_len.mean():.2f} (±{results_df.completion_len.std():.2f}, ∈[{results_df.completion_len.min():.2f}, {results_df.completion_len.max():.2f}])"
-            f", Truncated: {results_df.is_truncated.mean() * 100:.1f}%)"
-        )
-        get_logger().success(message)
-
-        eval_metrics = {
-            f"avg@{group_size}": float(results_df.reward.mean()),
-            "no_response/mean": float(results_df.no_response.mean()),
-            "no_response/count": int(results_df.no_response.sum()),
-            "completion_len/mean": results_df.completion_len.mean().item(),
-            "completion_len/max": results_df.completion_len.max().item(),
-            "completion_len/min": results_df.completion_len.min().item(),
-            "is_truncated/mean": results_df.is_truncated.mean().item(),
-            "failed_rollouts": failed_count / total_rollouts,
-            "time": eval_time,
-        }
-        if could_be_binary:
-            assert pass_at_k is not None
-            eval_metrics.update(pd.Series(pass_at_k.mean()).to_dict())
-        eval_metrics = {f"eval/{self.name}/{key}": v for key, v in eval_metrics.items()}
-        eval_metrics["step"] = step
-        monitor.log(eval_metrics, step=step)
-        monitor.log_eval_samples(successful_outputs, env_name=self.name, step=step)
-
-        return successful_outputs
-
 
 EnvT = TypeVar("EnvT", bound=Env)
 
diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py
index 902c8b963b..d685b519b6 100644
--- a/src/prime_rl/orchestrator/orchestrator.py
+++ b/src/prime_rl/orchestrator/orchestrator.py
@@ -499,7 +499,7 @@ async def main_loop(self) -> None:
                 assert self.eval_sink is not None  # eval rollouts only emitted when eval is configured
                 eval_batch = self.eval_sink.add(rollout)
                 if eval_batch is not None:
-                    self.finalize_eval_batch(eval_batch)
+                    await self.finalize_eval_batch(eval_batch)
                 continue
 
             assert isinstance(rollout, TrainRollout)
@@ -761,7 +761,7 @@ def log_train_batch(self, batch: TrainBatch, *, step: int, step_time: float) ->
             )
         get_logger().success("\n\t\t ".join(lines))
 
-    def finalize_eval_batch(self, batch: EvalBatch) -> None:
+    async def finalize_eval_batch(self, batch: EvalBatch) -> None:
         """Persist + log one completed eval epoch (save_rollouts,
         monitor.log_eval_samples, monitor.log)."""
         if not batch.rollouts:
@@ -770,24 +770,32 @@ def finalize_eval_batch(self, batch: EvalBatch) -> None:
 
         rollout_dicts = [r.to_dict() for r in batch.rollouts]
         step_path = get_step_path(get_rollout_dir(self.config.output_dir), batch.step)
-        save_rollouts(
+        await asyncio.to_thread(
+            save_rollouts,
             rollout_dicts,
             step_path / f"eval_rollouts_{batch.env_name}.jsonl",
             exclude_keys={"trajectory"},
         )
         self.monitor.log_eval_samples(rollout_dicts, env_name=batch.env_name, step=batch.step)
-        self.monitor.log(batch.metrics.to_wandb_dict(env_name=batch.env_name, step=batch.step), step=batch.step)
+        policy_versions = {r.policy_version for r in batch.rollouts}
+        policy_version = min(policy_versions)
+        if len(policy_versions) > 1:
+            get_logger().warning(
+                f"Eval {batch.env_name} step {batch.step} had mixed policy versions: {sorted(policy_versions)}"
+            )
+        metrics = batch.metrics.to_wandb_dict(env_name=batch.env_name, step=batch.step)
+        metrics[f"eval/{batch.env_name}/policy_version"] = float(policy_version)
+        self.monitor.log(metrics, step=batch.step)
 
         n_total = batch.metrics.n_rollouts
         error_rate = ((batch.metrics.n_cancelled + batch.metrics.n_errored) / n_total) if n_total else 0.0
-        max_off_policy = max((r.off_policy_steps for r in batch.rollouts), default=0)
         triggered_at = self.eval_triggered_at.pop((batch.env_name, batch.step), None)
         elapsed = (time.perf_counter() - triggered_at) if triggered_at is not None else 0.0
 
         get_logger().success(
             f"Evaluated {batch.env_name} (Step {batch.step}) | "
-            f"{format_time(elapsed):>7} | Reward {batch.metrics.reward_mean:.4f} | "
-            f"Turns {batch.metrics.num_turns_mean:.1f} | Max Off-Policy {max_off_policy} | "
+            f"Policy v{policy_version} | {format_time(elapsed):>7} | Reward {batch.metrics.reward_mean:.4f} | "
+            f"Turns {batch.metrics.num_turns_mean:.1f} | "
             f"Error {error_rate:.1%} | Truncation {batch.metrics.truncation_rate:.1%}"
         )
 
diff --git a/src/prime_rl/transport/zmq.py b/src/prime_rl/transport/zmq.py
index 5577b11e50..964017ddd3 100644
--- a/src/prime_rl/transport/zmq.py
+++ b/src/prime_rl/transport/zmq.py
@@ -2,6 +2,7 @@
 from time import time
 
 import zmq
+import zmq.asyncio
 
 from prime_rl.configs.shared import ZMQTransportConfig
 from prime_rl.trainer.runs import get_multi_run_manager
@@ -20,8 +21,10 @@ class ZMQTrainingBatchSender(TrainingBatchSender):
     def __init__(self, output_dir: Path, transport: ZMQTransportConfig):
         super().__init__(output_dir)
 
-        self.context = zmq.Context.instance()
-        self.socket: zmq.Socket = self.context.socket(zmq.PUSH)
+        # Async context so ``send`` yields instead of blocking the orchestrator
+        # event loop when the trainer is slow and we hit SNDHWM.
+        self.context = zmq.asyncio.Context.instance()
+        self.socket: zmq.asyncio.Socket = self.context.socket(zmq.PUSH)
         self.socket.setsockopt(zmq.SNDHWM, transport.hwm)
         self.socket.connect(f"tcp://{transport.host}:{transport.port}")
 
@@ -35,7 +38,7 @@ def __init__(self, output_dir: Path, transport: ZMQTransportConfig):
     async def send(self, batch: TrainingBatch) -> None:
         payload = self.encoder.encode(batch)
         self.logger.debug(f"Sending batch {batch.step} to {self.sender_id}")
-        self.socket.send_multipart([self.sender_id, payload], copy=False)
+        await self.socket.send_multipart([self.sender_id, payload], copy=False)
 
     def close(self) -> None:
         try:
diff --git a/uv.lock b/uv.lock
index 03e1a1ddae..3922f32b28 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3522,6 +3522,7 @@ dependencies = [
     { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "nvidia-ml-py", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "orjson", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-pydantic-config", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-rl-configs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -3676,6 +3677,7 @@ requires-dist = [
     { name = "opencode-math", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_math" },
     { name = "opencode-science", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_science" },
     { name = "opencode-swe", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_swe" },
+    { name = "orjson", specifier = ">=3.11.0" },
     { name = "prime", specifier = ">=0.6.4" },
     { name = "prime-pydantic-config", editable = "deps/pydantic-config" },
     { name = "prime-rl", extras = ["disagg"], marker = "extra == 'all'" },

From e0f8a35c78423f044100814a2104e850e3c6e489 Mon Sep 17 00:00:00 2001
From: hallerite <git@hallerite.com>
Date: Tue, 9 Jun 2026 05:05:13 +0200
Subject: [PATCH 07/12] feat(orchestrator): per-env advantage strategy (#2721)

---
 docs/algorithms.md                            | 16 ++++
 .../src/prime_rl/configs/orchestrator.py      | 96 ++++++++++---------
 src/prime_rl/orchestrator/envs.py             |  6 ++
 src/prime_rl/orchestrator/orchestrator.py     |  1 -
 src/prime_rl/orchestrator/train_sink.py       | 10 +-
 5 files changed, 78 insertions(+), 51 deletions(-)

diff --git a/docs/algorithms.md b/docs/algorithms.md
index fdd5b6e2da..0ffe69edba 100644
--- a/docs/algorithms.md
+++ b/docs/algorithms.md
@@ -167,6 +167,22 @@ kwargs = { eps = 1e-8 }
 
 `AdvantageInputs.rollouts` is a list of `verifiers.RolloutOutput`, so you have access to the full rollout (turns, tool calls, custom metadata) — not just the reward. Use this for anything reward-shaping-like that needs trajectory context.
 
+### Per-Env Advantage
+
+`advantage` can be set per training environment. Each env inherits the top-level `[orchestrator.advantage]` when it doesn't set its own, so mixed-env runs can give each env its own advantage computation:
+
+```toml
+[orchestrator.advantage]
+type = "default"  # the default every env inherits unless it overrides
+
+[[orchestrator.train.env]]
+id = "math-env"   # inherits the default above
+
+[[orchestrator.train.env]]
+id = "agent-env"
+advantage = { type = "custom", import_path = "my_module.normalized_advantage" }
+```
+
 ## Filters
 
 Filters drop rollouts between scoring and training. Built-ins (composable):
diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
index be5fe249f3..83f3fea7a6 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
@@ -143,6 +143,49 @@ def _deprecate_max_tokens(cls, data: Any) -> Any:
         return data
 
 
+class TokensLengthPenaltyConfig(BaseConfig):
+    type: Literal["tokens"] = "tokens"
+
+    completion_weight: float = Field(1.0, ge=0, allow_inf_nan=False)
+    """Weight on model completion tokens. Finite and non-negative."""
+
+    tool_response_weight: float = Field(1.0, ge=0, allow_inf_nan=False)
+    """Weight on tool-response tokens (read from the rollout's ``*_total_tool_response_tokens`` harness metric; 0 if absent). Finite and non-negative."""
+
+
+class TurnsLengthPenaltyConfig(BaseConfig):
+    type: Literal["turns"] = "turns"
+
+
+LengthPenaltyConfig: TypeAlias = Annotated[
+    TokensLengthPenaltyConfig | TurnsLengthPenaltyConfig,
+    Field(discriminator="type"),
+]
+
+
+class DefaultAdvantageConfig(BaseConfig):
+    type: Literal["default"] = "default"
+
+    length_penalty: LengthPenaltyConfig | None = None
+    """Correctness-gated length penalty. ``tokens`` shapes by weighted token cost; ``turns`` shapes by trajectory turn count; None disables shaping. In mixed groups, lower-cost correct rollouts get amplified advantage (up to 2x), higher-cost correct rollouts are unchanged, incorrect untouched. In all-correct groups, below-average-cost rollouts get advantage in [0, 1], others get 0."""
+
+
+class CustomAdvantageConfig(BaseConfig):
+    type: Literal["custom"] = "custom"
+
+    import_path: str
+    """Import path to the advantage function (e.g. ``my_module.my_advantage``)."""
+
+    kwargs: dict[str, Any] = Field(default_factory=dict)
+    """Kwargs forwarded to the advantage function."""
+
+
+AdvantageConfig: TypeAlias = Annotated[
+    DefaultAdvantageConfig | CustomAdvantageConfig,
+    Field(discriminator="type"),
+]
+
+
 class EnvConfig(BaseConfig):
     id: str = "reverse-text"
     """Registered verifiers environment ID (e.g. ``math-env``, ``primeintellect/math-env``). May include an ``@version`` suffix for installation."""
@@ -214,6 +257,11 @@ class TrainEnvConfig(EnvConfig):
     """Rollouts generated per example for GRPO group-relative advantages.
     Inherits from ``orchestrator.group_size`` when unset."""
 
+    advantage: AdvantageConfig | None = None
+    """Advantage strategy for this env's GRPO groups. Inherits from the top-level
+    ``orchestrator.advantage`` when unset; set a different ``default``/``custom``
+    config to give this env its own advantage computation."""
+
 
 class EvalEnvConfig(EnvConfig):
     sampling: EvalSamplingConfig = EvalSamplingConfig()
@@ -374,49 +422,6 @@ class CheckpointConfig(BaseConfig):
     """Skip loading the progress from checkpoint."""
 
 
-class TokensLengthPenaltyConfig(BaseConfig):
-    type: Literal["tokens"] = "tokens"
-
-    completion_weight: float = Field(1.0, ge=0, allow_inf_nan=False)
-    """Weight on model completion tokens. Finite and non-negative."""
-
-    tool_response_weight: float = Field(1.0, ge=0, allow_inf_nan=False)
-    """Weight on tool-response tokens (read from the rollout's ``*_total_tool_response_tokens`` harness metric; 0 if absent). Finite and non-negative."""
-
-
-class TurnsLengthPenaltyConfig(BaseConfig):
-    type: Literal["turns"] = "turns"
-
-
-LengthPenaltyConfig: TypeAlias = Annotated[
-    TokensLengthPenaltyConfig | TurnsLengthPenaltyConfig,
-    Field(discriminator="type"),
-]
-
-
-class DefaultAdvantageConfig(BaseConfig):
-    type: Literal["default"] = "default"
-
-    length_penalty: LengthPenaltyConfig | None = None
-    """Correctness-gated length penalty. ``tokens`` shapes by weighted token cost; ``turns`` shapes by trajectory turn count; None disables shaping. In mixed groups, lower-cost correct rollouts get amplified advantage (up to 2x), higher-cost correct rollouts are unchanged, incorrect untouched. In all-correct groups, below-average-cost rollouts get advantage in [0, 1], others get 0."""
-
-
-class CustomAdvantageConfig(BaseConfig):
-    type: Literal["custom"] = "custom"
-
-    import_path: str
-    """Import path to the advantage function (e.g. ``my_module.my_advantage``)."""
-
-    kwargs: dict[str, Any] = Field(default_factory=dict)
-    """Kwargs forwarded to the advantage function."""
-
-
-AdvantageConfig: TypeAlias = Annotated[
-    DefaultAdvantageConfig | CustomAdvantageConfig,
-    Field(discriminator="type"),
-]
-
-
 # Flags rare tokens generated at high entropy (Section 5.2, https://arxiv.org/abs/2510.02387).
 class GibberishFilterConfig(BaseConfig):
     type: Literal["gibberish"] = "gibberish"
@@ -876,6 +881,11 @@ def resolve_batching(self):
             if "group_size" not in env_cfg.model_fields_set:
                 env_cfg.group_size = self.group_size
 
+        # Propagate the top-level ``advantage`` into each train env that didn't set its own.
+        for env_cfg in self.train.env:
+            if "advantage" not in env_cfg.model_fields_set:
+                env_cfg.advantage = self.advantage
+
         # Resolve train env num_workers from max_inflight_rollouts
         for env_cfg in self.train.env:
             if env_cfg.num_workers == "auto":
diff --git a/src/prime_rl/orchestrator/envs.py b/src/prime_rl/orchestrator/envs.py
index 8d3f072720..34e12aa63f 100644
--- a/src/prime_rl/orchestrator/envs.py
+++ b/src/prime_rl/orchestrator/envs.py
@@ -12,6 +12,7 @@
 from verifiers.utils.serve_utils import get_free_port
 
 from prime_rl.configs.orchestrator import EnvConfig, EvalEnvConfig, TrainEnvConfig
+from prime_rl.orchestrator.advantage import AdvantageFn, setup_advantage_fn
 from prime_rl.utils.logger import get_logger
 
 REQUIRED_STATE_COLUMNS = ["trajectory"]
@@ -164,6 +165,11 @@ class TrainEnv(Env):
     def __init__(self, config: TrainEnvConfig):
         super().__init__(config)
         self.sampling_args = config.sampling.to_sampling_args()
+        # Built once — custom advantage funcs do an ``import_object`` we don't
+        # want to pay per group. ``None`` = reward-only path.
+        self.advantage_fn: AdvantageFn | None = (
+            setup_advantage_fn(config.advantage) if config.advantage is not None else None
+        )
 
     def get_dataset(self, seed: int | None = None):
         return self.env.get_dataset(seed=seed)
diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py
index d685b519b6..347c712b66 100644
--- a/src/prime_rl/orchestrator/orchestrator.py
+++ b/src/prime_rl/orchestrator/orchestrator.py
@@ -389,7 +389,6 @@ async def setup(self) -> None:
             mm_token_type_ids_mapping=self.mm_token_type_ids_mapping,
             batch_size=config.batch_size,
             token_batch_size=config.token_batch_size,
-            advantage_config=config.advantage,
             pre_filters=pre_filters,
             post_filters=post_filters,
         )
diff --git a/src/prime_rl/orchestrator/train_sink.py b/src/prime_rl/orchestrator/train_sink.py
index 26e7b915b0..f79a0d5eff 100644
--- a/src/prime_rl/orchestrator/train_sink.py
+++ b/src/prime_rl/orchestrator/train_sink.py
@@ -17,8 +17,8 @@
 import uuid
 from collections import defaultdict
 
-from prime_rl.configs.orchestrator import AdvantageConfig, OrchestratorConfig
-from prime_rl.orchestrator.advantage import assign_advantages, setup_advantage_fn
+from prime_rl.configs.orchestrator import OrchestratorConfig
+from prime_rl.orchestrator.advantage import assign_advantages
 from prime_rl.orchestrator.envs import TrainEnvs
 from prime_rl.orchestrator.filters import RolloutFilter, apply_filters
 from prime_rl.orchestrator.trajectories import (
@@ -44,7 +44,6 @@ def __init__(
         mm_token_type_ids_mapping: dict[int, int] | None,
         batch_size: int | None,
         token_batch_size: int | None,
-        advantage_config: AdvantageConfig | None,
         pre_filters: list[RolloutFilter],
         post_filters: list[RolloutFilter],
     ) -> None:
@@ -58,9 +57,6 @@ def __init__(
         self.mm_token_type_ids_mapping = mm_token_type_ids_mapping
         self.batch_size = batch_size
         self.token_batch_size = token_batch_size
-        # Built once — custom advantage funcs do an ``import_object`` and
-        # we don't want to pay that per group. ``None`` = reward-only path
-        self.advantage_fn = setup_advantage_fn(advantage_config) if advantage_config is not None else None
         self.pre_filters = pre_filters
         self.post_filters = post_filters
 
@@ -200,7 +196,7 @@ def process_group(self, group_id: uuid.UUID) -> None:
             )
             return
 
-        assign_advantages(survivors, self.advantage_fn)
+        assign_advantages(survivors, self.train_envs.get(env_name).advantage_fn)
 
         # Propagate to the pre-tokenized samples so the orchestrator can
         # collect samples at ship time without re-walking rollouts. The env

From 0695f9caa50a439331d064dfce0411be71383924 Mon Sep 17 00:00:00 2001
From: Tim Kostolansky <39891386+tim0120@users.noreply.github.com>
Date: Tue, 9 Jun 2026 12:34:20 -0700
Subject: [PATCH 08/12] fix: allow sft without teacher (#2720)

---
 .../src/prime_rl/configs/orchestrator.py          | 15 +++++++--------
 src/prime_rl/orchestrator/dispatcher.py           | 11 +++++------
 src/prime_rl/orchestrator/orchestrator.py         | 10 ++++++----
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
index 83f3fea7a6..55a2210abe 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
@@ -522,7 +522,7 @@ class OrchestratorConfig(BaseConfig):
     """Typed renderer config (``renderers.RendererConfig`` discriminated
     union). Defaults to ``"auto"``, which resolves from
     ``tokenizer.name_or_path`` via ``MODEL_RENDERER_MAP``. ``None``
-    opts into MITO (``openai_chat_completions``); SFT mode forces this."""
+    opts into MITO (``openai_chat_completions``)."""
 
     pool_size: int | None = Field(None, ge=1)
     """Number of renderer slots shared across concurrent rollouts. Bump
@@ -759,11 +759,10 @@ def validate_unique_filter_types(self):
 
     @model_validator(mode="after")
     def _force_no_renderer_for_sft(self):
-        """SFT rolls out via the teacher's plain chat-completions endpoint; the
-        renderer client doesn't apply. Force ``renderer=None`` so the user
-        doesn't have to remember to set it. Declared before the renderer
-        validators below so they see the corrected value."""
-        if self.training_mode == "sft":
+        """Teacher-backed SFT rolls out via the teacher's plain chat-completions
+        endpoint; the renderer client doesn't apply. When no teacher is
+        configured, SFT uses the student rollout path and keeps the renderer."""
+        if self.training_mode == "sft" and self.teacher is not None:
             self.renderer = None
         return self
 
@@ -773,8 +772,8 @@ def validate_training_mode(self):
         has_teacher = self.teacher is not None
         if self.training_mode == "rl" and has_teacher:
             raise ValueError("orchestrator.teacher must not be set when training_mode = 'rl'.")
-        if self.training_mode in ("opd", "sft") and not has_teacher:
-            raise ValueError(f"orchestrator.teacher must be configured when training_mode = '{self.training_mode}'.")
+        if self.training_mode == "opd" and not has_teacher:
+            raise ValueError("orchestrator.teacher must be configured when training_mode = 'opd'.")
         return self
 
     @model_validator(mode="after")
diff --git a/src/prime_rl/orchestrator/dispatcher.py b/src/prime_rl/orchestrator/dispatcher.py
index dcc5455daa..7ebb0149d2 100644
--- a/src/prime_rl/orchestrator/dispatcher.py
+++ b/src/prime_rl/orchestrator/dispatcher.py
@@ -133,6 +133,7 @@ def __init__(
         tasks_per_minute: float | None,
         max_off_policy_steps: int,
         training_mode: Literal["rl", "opd", "sft"],
+        use_cache_salt: bool = True,
     ) -> None:
         self.policy = policy
         self.train_envs = train_envs
@@ -144,6 +145,7 @@ def __init__(
         self.train_source = train_source
         self.eval_source = eval_source
         self.training_mode = training_mode
+        self.use_cache_salt = use_cache_salt
         self.max_off_policy_steps = max_off_policy_steps
 
         self.max_inflight = max_inflight_rollouts
@@ -413,13 +415,10 @@ async def schedule_group_rollout(self, group_id: uuid.UUID, group: GroupState) -
         if env_collection is None:
             return False
         env = env_collection.get(group.env_name)
-        # SFT-mode train rollouts hit the frozen teacher pool; salting per
-        # policy version would invalidate the teacher's prefix cache every
-        # weight update for no reason.
-        if self.training_mode == "sft" and group.kind == "train":
-            cache_salt = None
-        else:
+        if group.kind == "eval" or self.use_cache_salt:
             cache_salt = str(group.policy_version_at_start)
+        else:
+            cache_salt = None
 
         if env.requires_group_scoring:
             permits = group.rollouts_to_schedule
diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py
index 347c712b66..9823dd73d0 100644
--- a/src/prime_rl/orchestrator/orchestrator.py
+++ b/src/prime_rl/orchestrator/orchestrator.py
@@ -345,13 +345,14 @@ async def setup(self) -> None:
         else:
             get_logger().info("Training from scratch")
 
-        # SFT generates rollouts via the teacher (the student is trained on
-        # the teacher's outputs); RL / OPD generate via the student
-        if config.training_mode == "sft":
-            assert self.teacher_inference is not None, "sft mode requires teacher inference"
+        # SFT train rollouts come from the teacher when configured; otherwise
+        # they use the existing student rollout pool.
+        if config.training_mode == "sft" and self.teacher_inference is not None:
             rollout_inference = self.teacher_inference
+            use_cache_salt = False
         else:
             rollout_inference = self.student_inference
+            use_cache_salt = True
 
         self.train_source = TrainSource(self.train_envs, seed=42)
         self.eval_source: EvalSource | None = (
@@ -379,6 +380,7 @@ async def setup(self) -> None:
             tasks_per_minute=config.tasks_per_minute,
             max_off_policy_steps=config.max_off_policy_steps,
             training_mode=config.training_mode,
+            use_cache_salt=use_cache_salt,
         )
         self.metrics = MetricsBuilder(config)
         self.train_sink = TrainSink(

From c8759c370a8ce047574cbc02b670304dd6876fd1 Mon Sep 17 00:00:00 2001
From: faresobeid <111092724+faresobeid@users.noreply.github.com>
Date: Wed, 10 Jun 2026 00:01:11 +0100
Subject: [PATCH 09/12] add router replay to latentMoE models (#2738)

---
 src/prime_rl/trainer/models/layers/moe.py     | 63 ++++++++++++-------
 .../models/nemotron_h/modeling_nemotron_h.py  | 18 +++++-
 2 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/src/prime_rl/trainer/models/layers/moe.py b/src/prime_rl/trainer/models/layers/moe.py
index 14d46b2f89..52013c8753 100644
--- a/src/prime_rl/trainer/models/layers/moe.py
+++ b/src/prime_rl/trainer/models/layers/moe.py
@@ -955,32 +955,44 @@ def __init__(
         self.norm_topk_prob = norm_topk_prob
 
     def forward(
-        self, x: torch.Tensor, expert_bias: torch.Tensor | None = None
+        self,
+        x: torch.Tensor,
+        expert_bias: torch.Tensor | None = None,
+        routed_experts: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         scores = F.linear(x.float(), self.gate.float()).sigmoid()
-        scores_for_choice = scores + self.e_score_correction_bias
 
-        if expert_bias is not None:
-            scores_for_choice = scores_for_choice + expert_bias
-
-        # Group-based routing
-        if self.n_group > 1:
-            group_scores = (
-                scores_for_choice.view(-1, self.n_group, self.num_experts // self.n_group)
-                .topk(2, dim=-1)[0]
-                .sum(dim=-1)
-            )
-            group_idx = torch.topk(group_scores, k=self.topk_group, dim=-1, sorted=False)[1]
-            group_mask = torch.zeros_like(group_scores)
-            group_mask.scatter_(1, group_idx, 1)
-            score_mask = (
-                group_mask.unsqueeze(-1)
-                .expand(-1, self.n_group, self.num_experts // self.n_group)
-                .reshape(-1, self.num_experts)
-            )
-            scores_for_choice = scores_for_choice.masked_fill(~score_mask.bool(), 0.0)
+        if routed_experts is not None:
+            # Router replay: reuse the inference engine's expert selection and
+            # only recompute the gating weights from the trainer's scores. The
+            # correction/load-balancing biases only affect selection, so they
+            # are intentionally skipped here.
+            selected_experts_indices = routed_experts
+        else:
+            scores_for_choice = scores + self.e_score_correction_bias
+
+            if expert_bias is not None:
+                scores_for_choice = scores_for_choice + expert_bias
+
+            # Group-based routing
+            if self.n_group > 1:
+                group_scores = (
+                    scores_for_choice.view(-1, self.n_group, self.num_experts // self.n_group)
+                    .topk(2, dim=-1)[0]
+                    .sum(dim=-1)
+                )
+                group_idx = torch.topk(group_scores, k=self.topk_group, dim=-1, sorted=False)[1]
+                group_mask = torch.zeros_like(group_scores)
+                group_mask.scatter_(1, group_idx, 1)
+                score_mask = (
+                    group_mask.unsqueeze(-1)
+                    .expand(-1, self.n_group, self.num_experts // self.n_group)
+                    .reshape(-1, self.num_experts)
+                )
+                scores_for_choice = scores_for_choice.masked_fill(~score_mask.bool(), 0.0)
+
+            selected_experts_indices = torch.topk(scores_for_choice, k=self.top_k, dim=-1, sorted=False)[1]
 
-        selected_experts_indices = torch.topk(scores_for_choice, k=self.top_k, dim=-1, sorted=False)[1]
         top_scores = scores.gather(1, selected_experts_indices)
         routing_confidence_sum = _selected_probability_mass_sum(scores, top_scores, "sigmoid")
 
@@ -1181,8 +1193,13 @@ def forward(self, x: torch.Tensor, routed_experts: torch.Tensor | None = None) -
         bs, slen, dim = x.shape
         x_flat = x.view(-1, dim)
 
+        if routed_experts is not None:
+            # Flatten to (bs * slen, top_k); reshape (not view) since the slice is non-contiguous.
+            _, _, top_k = routed_experts.shape
+            routed_experts = routed_experts.reshape(-1, top_k)
+
         top_scores, selected_experts_indices, num_tokens_per_expert, routing_confidence_sum = self.router(
-            x_flat, self.expert_bias
+            x_flat, self.expert_bias, routed_experts=routed_experts
         )
 
         with torch.no_grad():
diff --git a/src/prime_rl/trainer/models/nemotron_h/modeling_nemotron_h.py b/src/prime_rl/trainer/models/nemotron_h/modeling_nemotron_h.py
index d9c5ec1ede..f64d8d43f2 100644
--- a/src/prime_rl/trainer/models/nemotron_h/modeling_nemotron_h.py
+++ b/src/prime_rl/trainer/models/nemotron_h/modeling_nemotron_h.py
@@ -223,6 +223,7 @@ def forward(
         position_embeddings: tuple[torch.Tensor, torch.Tensor] | None = None,
         cu_seqlens: torch.LongTensor | None = None,
         max_seqlen: int | None = None,
+        routed_experts: torch.Tensor | None = None,
     ) -> torch.Tensor:
         residual = hidden_states
         hidden_states = self.norm(hidden_states)
@@ -266,10 +267,11 @@ def forward(
         position_embeddings: tuple[torch.Tensor, torch.Tensor] | None = None,
         cu_seqlens: torch.LongTensor | None = None,
         max_seqlen: int | None = None,
+        routed_experts: torch.Tensor | None = None,
     ) -> torch.Tensor:
         residual = hidden_states
         hidden_states = self.norm(hidden_states)
-        hidden_states = self.mlp(hidden_states)
+        hidden_states = self.mlp(hidden_states, routed_experts=routed_experts)
         return residual + hidden_states
 
 
@@ -298,6 +300,7 @@ def forward(
         position_embeddings: tuple[torch.Tensor, torch.Tensor] | None = None,
         cu_seqlens: torch.LongTensor | None = None,
         max_seqlen: int | None = None,
+        routed_experts: torch.Tensor | None = None,
     ) -> torch.Tensor:
         residual = hidden_states
         hidden_states = self.norm(hidden_states)
@@ -498,7 +501,13 @@ def forward(
         input_ids: Optional[torch.LongTensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
         inputs_embeds: Optional[torch.FloatTensor] = None,
+        routed_experts: Optional[torch.LongTensor] = None,
     ) -> BaseModelOutputWithPast:
+        """
+        routed_experts (`torch.LongTensor` of shape `(batch_size, sequence_length, num_hidden_layers, num_experts_per_tok)`, *optional*):
+            Routed experts for each token, indexed by global layer index. Only used for router replay; slots
+            for non-MoE (Mamba/attention) layers are ignored.
+        """
         if (input_ids is None) ^ (inputs_embeds is not None):
             raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
 
@@ -516,12 +525,15 @@ def forward(
         hidden_states = inputs_embeds
         position_embeddings = self.rotary_emb(hidden_states, position_ids) if self.rotary_emb is not None else None
 
-        for decoder_layer in self.layers:
+        for layer_idx, decoder_layer in enumerate(self.layers):
+            # routed_experts is indexed by global layer index; non-MoE layers ignore it.
+            routed_experts_layer = routed_experts[:, :, layer_idx, :] if routed_experts is not None else None
             hidden_states = decoder_layer(
                 hidden_states,
                 position_embeddings=position_embeddings,
                 cu_seqlens=cu_seqlens,
                 max_seqlen=max_seqlen,
+                routed_experts=routed_experts_layer,
             )
 
         hidden_states = self.norm(hidden_states)
@@ -550,6 +562,7 @@ def forward(
         labels: Optional[torch.LongTensor] = None,
         logits_to_keep: int = 0,
         temperature: Optional[torch.Tensor] = None,
+        routed_experts: Optional[torch.LongTensor] = None,
         **kwargs,
     ) -> PrimeLmOutput:
         if position_ids is None:
@@ -562,6 +575,7 @@ def forward(
             input_ids=input_ids,
             position_ids=position_ids,
             inputs_embeds=inputs_embeds,
+            routed_experts=routed_experts,
         )
 
         hidden_states = outputs.last_hidden_state

From 632ef2b31820408a874332dbfa7d6a416ed77583 Mon Sep 17 00:00:00 2001
From: samsja <55492238+samsja@users.noreply.github.com>
Date: Tue, 9 Jun 2026 19:31:08 -0700
Subject: [PATCH 10/12] chore: remove private configs submodule (#2741)

Remove the configs/private submodule (research-configs) and all
references to it throughout the codebase:

- Remove submodule from .gitmodules and git tracking
- Simplify install.sh: use plain git submodule update --init --recursive
  now that no private submodule can fail for users without access
- Update skills/install/SKILL.md to reflect simplified submodule init
- Remove configs/private/ entry from skills/configs/SKILL.md key files
- Simplify test_configs.py: no longer need to filter out private/ path
---
 .gitmodules                |  3 ---
 configs/private            |  1 -
 scripts/install.sh         | 18 +-----------------
 skills/configs/SKILL.md    |  1 -
 skills/install/SKILL.md    |  4 +---
 tests/unit/test_configs.py |  5 ++---
 6 files changed, 4 insertions(+), 28 deletions(-)
 delete mode 160000 configs/private

diff --git a/.gitmodules b/.gitmodules
index 2041f460ee..b378f0ebbf 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,9 +7,6 @@
 [submodule "research-environments"]
 	path = deps/research-environments
 	url = git@github.com:PrimeIntellect-ai/research-environments.git
-[submodule "configs/private"]
-	path = configs/private
-	url = git@github.com:PrimeIntellect-ai/research-configs.git
 [submodule "pydantic-config"]
 	path = deps/pydantic-config
 	url = https://github.com/PrimeIntellect-ai/pydantic-config
diff --git a/configs/private b/configs/private
deleted file mode 160000
index 70c3503e1d..0000000000
--- a/configs/private
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 70c3503e1dc4ea499b09f0eee206b509169b79bd
diff --git a/scripts/install.sh b/scripts/install.sh
index 630bf1d576..833726d3d9 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -74,27 +74,11 @@ ensure_known_hosts() {
   fi
 }
 
-# Initialize each submodule independently so that a missing private repo
-# (e.g. configs/private when the user lacks access) does not abort the install.
 init_submodules() {
   if [ ! -f .gitmodules ]; then
     return 0
   fi
-  local paths failures
-  paths=$(git config -f .gitmodules --get-regexp '^submodule\..*\.path$' | awk '{print $2}')
-  failures=()
-  for path in $paths; do
-    log_info "Initializing submodule: ${path}"
-    if git submodule update --init --recursive -- "$path"; then
-      :
-    else
-      log_warn "Could not initialize submodule '${path}' (likely no access). Continuing without it."
-      failures+=("$path")
-    fi
-  done
-  if [ "${#failures[@]}" -gt 0 ]; then
-    log_warn "Skipped submodules: ${failures[*]}"
-  fi
+  git submodule update --init --recursive
 }
 
 main() {
diff --git a/skills/configs/SKILL.md b/skills/configs/SKILL.md
index 83f7dd8d47..129f89d551 100644
--- a/skills/configs/SKILL.md
+++ b/skills/configs/SKILL.md
@@ -74,5 +74,4 @@ Leave it unset for normal training. When enabled, it exports every sequence from
 
 - `packages/prime-rl-configs/src/prime_rl/` — config classes under `configs/`; `utils/config.py` re-exports `BaseConfig` and `cli`
 - `configs/debug/` — minimal debug configs
-- `configs/private/` — private configs submodule (internal)
 - `examples/` — full example configs
diff --git a/skills/install/SKILL.md b/skills/install/SKILL.md
index 3ad7e164b8..aa3dad0661 100644
--- a/skills/install/SKILL.md
+++ b/skills/install/SKILL.md
@@ -16,11 +16,9 @@ bash scripts/install.sh   # clones, inits submodules, installs uv, runs `uv sync
 For an existing clone, init submodules explicitly:
 
 ```bash
-git submodule update --init -- deps/verifiers deps/renderers deps/research-environments deps/pydantic-config
+git submodule update --init --recursive
 ```
 
-Do **not** run `git submodule update --init --recursive` without paths — it tries to clone the private `configs/private` submodule and aborts for users without access. `scripts/install.sh` walks submodules one at a time and skips failures, so it works for everyone.
-
 ## Sync
 
 ```bash
diff --git a/tests/unit/test_configs.py b/tests/unit/test_configs.py
index fcdee7a843..9ad10fef69 100644
--- a/tests/unit/test_configs.py
+++ b/tests/unit/test_configs.py
@@ -26,9 +26,8 @@
 
 
 def get_config_files() -> list[Path]:
-    """Any TOML file inside `configs/` or `examples/` (skips the configs/private/ submodule)."""
-    private = Path("configs/private")
-    config_files = [p for p in Path("configs").rglob("*.toml") if private not in p.parents]
+    """Any TOML file inside `configs/` or `examples/`."""
+    config_files = list(Path("configs").rglob("*.toml"))
     example_files = list(Path("examples").rglob("*.toml"))
 
     return config_files + example_files

From 04d067145f5f2698112c93f61c78f457f2f19fda Mon Sep 17 00:00:00 2001
From: samsja <55492238+samsja@users.noreply.github.com>
Date: Tue, 9 Jun 2026 19:31:19 -0700
Subject: [PATCH 11/12] update deps (#2736)

* update deps

* update deps

* update deps
---
 deps/research-environments |   2 +-
 deps/verifiers             |   2 +-
 pyproject.toml             |   5 +-
 uv.lock                    | 804 ++++++++++++++++++++++++++++++++-----
 4 files changed, 713 insertions(+), 100 deletions(-)

diff --git a/deps/research-environments b/deps/research-environments
index c752781984..4c08260f07 160000
--- a/deps/research-environments
+++ b/deps/research-environments
@@ -1 +1 @@
-Subproject commit c752781984c1b4fbb0a3d7f4aac1e7ed67cc749e
+Subproject commit 4c08260f07d1f907d3adee93bf55b94e177865c9
diff --git a/deps/verifiers b/deps/verifiers
index 05c66c2358..0ad8b4d523 160000
--- a/deps/verifiers
+++ b/deps/verifiers
@@ -1 +1 @@
-Subproject commit 05c66c235875d785754f2b7078db0e7deeddbeae
+Subproject commit 0ad8b4d523caf7d5eeceb013eee1b63b737925ea
diff --git a/pyproject.toml b/pyproject.toml
index 719b1228e6..4f02554486 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -79,7 +79,6 @@ envs = [
     "math-python",
     "math500",
     "mini-swe-agent-plus",
-    "mini-swe-agent-plus-rlm",
     "mmlu-pro",
     "opencode-cp",
     "opencode-deepdive",
@@ -152,6 +151,7 @@ override-dependencies = [
     "transformers==5.6.2",
     "torch>=2.9.0",
     "openenv-core",
+    "verifiers[packages]>=0.1.15.dev150",
 ]
 
 # ModelExpress 0.3.0 publishes protobuf<6 metadata, but its generated proto is
@@ -209,6 +209,7 @@ color-codeword = { path = "deps/research-environments/environments/color_codewor
 deepdive = { path = "deps/research-environments/environments/deepdive", editable = true }
 general-agent = { path = "deps/research-environments/environments/general_agent", editable = true }
 gpqa = { path = "deps/research-environments/environments/gpqa", editable = true }
+harnesses = { path = "deps/verifiers/packages/harnesses", editable = true }
 hle = { path = "deps/research-environments/environments/hle", editable = true }
 ifeval = { path = "deps/research-environments/environments/ifeval", editable = true }
 livecodebench = { path = "deps/research-environments/environments/livecodebench", editable = true }
@@ -217,7 +218,6 @@ math-env = { path = "deps/research-environments/environments/math_env", editable
 math-python = { path = "deps/verifiers/environments/math_python", editable = true }
 math500 = { path = "deps/research-environments/environments/math500", editable = true }
 mini-swe-agent-plus = { path = "deps/research-environments/environments/mini_swe_agent_plus", editable = true }
-mini-swe-agent-plus-rlm = { path = "deps/research-environments/environments/mini_swe_agent_plus_rlm", editable = true }
 mmlu-pro = { path = "deps/research-environments/environments/mmlu_pro", editable = true }
 opencode-cp = { path = "deps/research-environments/environments/opencode_cp", editable = true }
 opencode-deepdive = { path = "deps/research-environments/environments/opencode_deepdive", editable = true }
@@ -229,6 +229,7 @@ rlm-swe = { path = "deps/research-environments/environments/rlm_swe", editable =
 science-env = { path = "deps/research-environments/environments/science_env", editable = true }
 simpleqa-verified = { path = "deps/research-environments/environments/simpleqa_verified", editable = true }
 tau2-bench = { path = "deps/research-environments/environments/tau2_bench", editable = true }
+tasksets = { path = "deps/verifiers/packages/tasksets", editable = true }
 wiki-search = { path = "deps/verifiers/environments/wiki_search", editable = true }
 wordle = { path = "deps/verifiers/environments/wordle", editable = true }
 torch = { index = "pytorch-cu128" }
diff --git a/uv.lock b/uv.lock
index 3922f32b28..af14aed2c2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -11,7 +11,7 @@ supported-markers = [
 ]
 
 [options]
-exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values.
+exclude-newer = "2026-06-01T23:58:54.614773201Z"
 exclude-newer-span = "P7D"
 
 [options.exclude-newer-package]
@@ -41,6 +41,7 @@ overrides = [
     { name = "openenv-core" },
     { name = "torch", specifier = ">=2.9.0", index = "https://download.pytorch.org/whl/cu128" },
     { name = "transformers", specifier = "==5.6.2" },
+    { name = "verifiers", extras = ["packages"], editable = "deps/verifiers" },
 ]
 
 [[manifest.dependency-metadata]]
@@ -68,32 +69,44 @@ wheels = [
 
 [[package]]
 name = "aime2024"
-version = "0.1.20"
+version = "0.2.0"
 source = { editable = "deps/research-environments/environments/aime2024" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev152" },
 ]
 
 [[package]]
 name = "aime2025"
-version = "0.1.20"
+version = "0.2.0"
 source = { editable = "deps/research-environments/environments/aime2025" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev152" },
+]
+
+[[package]]
+name = "aiofile"
+version = "3.11.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "caio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/48/41/2fea7e193e061ce54eacc3b7bc0e6a99e4fcff43c78cf0a76dd781ed8334/aiofile-3.11.1.tar.gz", hash = "sha256:1f91912c6643d2a4e49ca4ae3514f0bf3867ce948a36d99a6411b8f4755f4cf9", size = 19342, upload-time = "2026-05-16T08:18:33.538Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/67/cd/0d76dfc5de72bde52f55f53e925c7d152d9c7906634ec1e0cbc7e8d4ad93/aiofile-3.11.1-py3-none-any.whl", hash = "sha256:ce77d14ac07f77bc2b757834a5c129321f3f705c474593deed5ab209079a52c9", size = 20446, upload-time = "2026-05-16T08:18:32.051Z" },
 ]
 
 [[package]]
@@ -162,7 +175,7 @@ name = "alphabet-sort"
 version = "0.1.12"
 source = { editable = "deps/verifiers/environments/alphabet_sort" }
 dependencies = [
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -278,6 +291,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" },
 ]
 
+[[package]]
+name = "authlib"
+version = "1.7.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "joserfc", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/98/7d93f30d029643c0275dbc0bd6d5a6f670661ee6c9a94d93af7ab4887600/authlib-1.7.2.tar.gz", hash = "sha256:2cea25fefcd4e7173bdf1372c0afc265c8034b23a8cd5dcb6a9164b826c64231", size = 176511, upload-time = "2026-05-06T08:10:23.116Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/95/adcb68e20c34162e9135f370d6e31737719c2b6f94bc953fe7ed1f10fe21/authlib-1.7.2-py2.py3-none-any.whl", hash = "sha256:3e1faedc9d87e7d56a164eca3ccb6ace0d61b94abe83e92242f8dc8bba9b4a9f", size = 259548, upload-time = "2026-05-06T08:10:21.436Z" },
+]
+
 [[package]]
 name = "backoff"
 version = "2.2.1"
@@ -358,6 +384,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0e/c6/d1fe8bdea4a6088bd54b5a58bc40aed89a4e784cd796af7722a06f74bae7/blake3-1.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a25db3d36b55f5ed6a86470155cc749fc9c5b91c949b8d14f48658f9d960d9ec", size = 554211, upload-time = "2025-10-14T06:46:00.269Z" },
 ]
 
+[[package]]
+name = "brotli"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f7/16/c92ca344d646e71a43b8bb353f0a6490d7f6e06210f8554c8f874e454285/brotli-1.2.0.tar.gz", hash = "sha256:e310f77e41941c13340a95976fe66a8a95b01e783d430eeaf7a2f87e0a57dd0a", size = 7388632, upload-time = "2025-11-05T18:39:42.86Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/6f/feba03130d5fceadfa3a1bb102cb14650798c848b1df2a808356f939bb16/brotli-1.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:acec55bb7c90f1dfc476126f9711a8e81c9af7fb617409a9ee2953115343f08d", size = 1528071, upload-time = "2025-11-05T18:38:26.081Z" },
+    { url = "https://files.pythonhosted.org/packages/03/a7/03aa61fbc3c5cbf99b44d158665f9b0dd3d8059be16c460208d9e385c837/brotli-1.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:072e7624b1fc4d601036ab3f4f27942ef772887e876beff0301d261210bca97f", size = 1419762, upload-time = "2025-11-05T18:38:28.295Z" },
+    { url = "https://files.pythonhosted.org/packages/21/1b/0374a89ee27d152a5069c356c96b93afd1b94eae83f1e004b57eb6ce2f10/brotli-1.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adedc4a67e15327dfdd04884873c6d5a01d3e3b6f61406f99b1ed4865a2f6d28", size = 1484494, upload-time = "2025-11-05T18:38:29.29Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/3b/39e13ce78a8e9a621c5df3aeb5fd181fcc8caba8c48a194cd629771f6828/brotli-1.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:af43b8711a8264bb4e7d6d9a6d004c3a2019c04c01127a868709ec29962b6036", size = 1487913, upload-time = "2025-11-05T18:38:31.618Z" },
+]
+
 [[package]]
 name = "build"
 version = "1.5.0"
@@ -392,6 +430,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8c/7b/1fc1c09cc0756cf25861a3be10565915953876da48bb228fb9a672b20a42/cachetools-7.1.4-py3-none-any.whl", hash = "sha256:323dc4127934744db5b54eb4924482d7edafbf9554e820d1531c2e08c0e4ef54", size = 16761, upload-time = "2026-05-21T22:40:41.845Z" },
 ]
 
+[[package]]
+name = "caio"
+version = "0.9.25"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/92/88/b8527e1b00c1811db339a1df8bd1ae49d146fcea9d6a5c40e3a80aaeb38d/caio-0.9.25.tar.gz", hash = "sha256:16498e7f81d1d0f5a4c0ad3f2540e65fe25691376e0a5bd367f558067113ed10", size = 26781, upload-time = "2025-12-26T15:21:36.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/2b/21288691f16d479945968a0a4f2856818c1c5be56881d51d4dac9b255d26/caio-0.9.25-cp312-cp312-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:97084e4e30dfa598449d874c4d8e0c8d5ea17d2f752ef5e48e150ff9d240cd64", size = 82012, upload-time = "2025-12-26T15:22:20.983Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c4/8a1b580875303500a9c12b9e0af58cb82e47f5bcf888c2457742a138273c/caio-0.9.25-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:4fa69eba47e0f041b9d4f336e2ad40740681c43e686b18b191b6c5f4c5544bfb", size = 81502, upload-time = "2026-03-04T22:08:22.381Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/1c/0fe770b8ffc8362c48134d1592d653a81a3d8748d764bec33864db36319d/caio-0.9.25-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:6bebf6f079f1341d19f7386db9b8b1f07e8cc15ae13bfdaff573371ba0575d69", size = 80200, upload-time = "2026-03-04T22:08:23.382Z" },
+    { url = "https://files.pythonhosted.org/packages/86/93/1f76c8d1bafe3b0614e06b2195784a3765bbf7b0a067661af9e2dd47fc33/caio-0.9.25-py3-none-any.whl", hash = "sha256:06c0bb02d6b929119b1cfbe1ca403c768b2013a369e2db46bfa2a5761cf82e40", size = 19087, upload-time = "2025-12-26T15:22:00.221Z" },
+]
+
 [[package]]
 name = "cbor2"
 version = "6.1.1"
@@ -526,11 +576,11 @@ wheels = [
 
 [[package]]
 name = "code-env"
-version = "0.3.1"
+version = "0.3.2"
 source = { editable = "deps/research-environments/environments/code_env" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -538,7 +588,7 @@ requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
     { name = "pytest", marker = "extra == 'dev'" },
     { name = "pytest-asyncio", marker = "extra == 'dev'" },
-    { name = "verifiers", specifier = ">=0.1.13.dev8" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 provides-extras = ["dev"]
 
@@ -548,13 +598,13 @@ version = "0.1.0"
 source = { editable = "deps/research-environments/environments/color_codeword" }
 dependencies = [
     { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "pillow", specifier = ">=10.0.0" },
-    { name = "verifiers", specifier = ">=0.1.10" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -731,6 +781,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" },
 ]
 
+[[package]]
+name = "cyclopts"
+version = "4.16.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "docstring-parser", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich-rst", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/34/07/bf61d13de86d96a4c46aff00c9ca0eced44bcc8c3e16280605c1253e5720/cyclopts-4.16.1.tar.gz", hash = "sha256:8aa47bf92a5fb33abca5af05e576eecdb0d2f79893ad29238046df78370fc4a8", size = 181196, upload-time = "2026-05-25T15:29:08.518Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/8d/7f362c2fb8ef4decd2160bc24d4292c6ca658cc6d9a161b89ca5122bbdbf/cyclopts-4.16.1-py3-none-any.whl", hash = "sha256:617795392c4113a2c2cc7af716f20244900e87f23daa05442d1268d81472a592", size = 219020, upload-time = "2026-05-25T15:29:09.646Z" },
+]
+
 [[package]]
 name = "dataclasses-json"
 version = "0.6.7"
@@ -826,7 +891,7 @@ dependencies = [
     { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "diskcache", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "pdfminer-six", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -835,7 +900,7 @@ requires-dist = [
     { name = "datasets", specifier = ">=4.0.0" },
     { name = "diskcache", specifier = ">=5.6.0" },
     { name = "pdfminer-six", specifier = ">=20251107" },
-    { name = "verifiers", specifier = ">=0.1.11.dev0" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -968,6 +1033,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
 ]
 
+[[package]]
+name = "exceptiongroup"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
+]
+
 [[package]]
 name = "executing"
 version = "2.2.1"
@@ -1076,6 +1153,66 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/41/2c368f804bb9bd918da3b61324207fc4b410d0f32352c372c0680fc1f670/fastcore-1.13.2-py3-none-any.whl", hash = "sha256:2103c9e9e613311c0b36eab17299a221e778fd214ec526e8df1d32908928277c", size = 105060, upload-time = "2026-05-17T06:02:22.28Z" },
 ]
 
+[[package]]
+name = "fastmcp"
+version = "3.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fastmcp-slim", extra = ["client", "server"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3b/a9/5c5a01b6abd5346bf60b97cfd29e4a86661940c27dd562bfcda07fd03519/fastmcp-3.3.1.tar.gz", hash = "sha256:979362ea557de42a5f40342563c7e4b236bcc8e7cd192715f50030695d1a71cd", size = 28681699, upload-time = "2026-05-15T15:50:39.673Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/11/6b1bdada6ccfe647d615ae63f9106f8136aec17971e9361546af01c7d38e/fastmcp-3.3.1-py3-none-any.whl", hash = "sha256:862440c5c4d281363a5995eee59d77f0f7cac1f18869038729cecf03b02fc522", size = 7903, upload-time = "2026-05-15T15:50:36.424Z" },
+]
+
+[[package]]
+name = "fastmcp-slim"
+version = "3.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "platformdirs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", extra = ["email"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic-settings", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "python-dotenv", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/a0/627103e517e1d0d6f1eec633d5662d13e776f01b45ad188e4f5f7478b438/fastmcp_slim-3.3.1.tar.gz", hash = "sha256:0957835fc59452e143ab2f4b7836d2d2df9b2d9958408edc79ba8b56232b2a88", size = 567007, upload-time = "2026-05-15T15:50:10.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7a/ee/97047f4cc2d7b1d46670d08d8ad01a96e7a748cc01c0b4b351ad8eddbc7a/fastmcp_slim-3.3.1-py3-none-any.whl", hash = "sha256:6cf1c2d77e3adb0d409d6825ed6b0b2a999062973e00b8eea03bd48bf9b4c043", size = 738644, upload-time = "2026-05-15T15:50:08.336Z" },
+]
+
+[package.optional-dependencies]
+client = [
+    { name = "authlib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "exceptiongroup", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "opentelemetry-api", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+server = [
+    { name = "authlib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "cyclopts", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "exceptiongroup", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "griffelib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jsonref", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jsonschema-path", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "openapi-pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "opentelemetry-api", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "packaging", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyperclip", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "python-multipart", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uncalled-for", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "watchfiles", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
 [[package]]
 name = "fastokens"
 version = "0.2.0"
@@ -1281,7 +1418,7 @@ source = { editable = "deps/research-environments/environments/general_agent" }
 dependencies = [
     { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "tyro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -1292,7 +1429,7 @@ requires-dist = [
     { name = "ruff", marker = "extra == 'dev'" },
     { name = "ty", marker = "extra == 'dev'" },
     { name = "tyro", specifier = ">=0.9" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" },
 ]
 provides-extras = ["dev", "test"]
 
@@ -1362,6 +1499,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/7a/1c6e3562dfd8950adbb11ffbc65d21e7c89d01a6e4f137fa981056de25c5/gitpython-3.1.50-py3-none-any.whl", hash = "sha256:d352abe2908d07355014abdd21ddf798c2a961469239afec4962e9da884858f9", size = 212507, upload-time = "2026-05-06T04:01:23.799Z" },
 ]
 
+[[package]]
+name = "google-auth"
+version = "2.53.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyasn1-modules", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/ad/ff781329bbbdc0974a098d996e89c9e1f7024262f9e3eec442fbb9ad1ac6/google_auth-2.53.0.tar.gz", hash = "sha256:e7e6aa16f6bee7b2b264830fd04f08087a1d5a836df516251a5d15327b246c9c", size = 335844, upload-time = "2026-05-15T20:53:07.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/c9/db44165ba7c581268c6d46017ef63339110378305062830104fc7fa144cb/google_auth-2.53.0-py3-none-any.whl", hash = "sha256:6e7449917c599b35126a99ec268ec6880301f2fea41dce198fe8fd83ff642b68", size = 246071, upload-time = "2026-05-15T20:53:05.609Z" },
+]
+
+[package.optional-dependencies]
+requests = [
+    { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[[package]]
+name = "google-genai"
+version = "2.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "distro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "google-auth", extra = ["requests"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "sniffio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a7/7b/6eb3b3d545b6bb4c374acba1ccf91b0f33b605e551536a6243cfcef2f07f/google_genai-2.7.0.tar.gz", hash = "sha256:3c6f32f5ced9877ededd1b384b5e5b7f09c20046ec3390b662b16d8cd1882ac5", size = 555853, upload-time = "2026-05-28T15:39:24.58Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/dd/7a8be39e9d698e80e9db796514efbc6083dbd787bdb9a101e8ba47248e5e/google_genai-2.7.0-py3-none-any.whl", hash = "sha256:21cac381e09a869151706aba797b6a4f96cfe92c484e13204d092caee7ff11cb", size = 822545, upload-time = "2026-05-28T15:39:22.907Z" },
+]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.75.0"
@@ -1380,13 +1556,67 @@ version = "0.1.5"
 source = { editable = "deps/research-environments/environments/gpqa" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
+]
+
+[[package]]
+name = "gradio"
+version = "6.15.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "brotli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "gradio-client", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "groovy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "hf-gradio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jinja2", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "markupsafe", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "orjson", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "packaging", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pandas", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "python-multipart", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pytz", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "safehttpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "semantic-version", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tomlkit", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b9/7a/edc719d67beea0963721a7f552604faa0feaf218d4a51c6e0dacfb51ba6a/gradio-6.15.1.tar.gz", hash = "sha256:58be31be7b3aab53bbe61f21c20666b4a8a25a6737c399e02b7463d669625851", size = 36429761, upload-time = "2026-05-27T13:20:35.232Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/87/b6d08b4b0a5bf7bb219c4c44d1d267ce6dfbc1ff062ca05ce251a50d18a5/gradio-6.15.1-py3-none-any.whl", hash = "sha256:f4f50488f8da1137b8e0d65fe656348b11bac3cd2f5ccab60636eb45e5a6f39f", size = 20093799, upload-time = "2026-05-27T13:20:31.18Z" },
+]
+
+[[package]]
+name = "gradio-client"
+version = "2.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fsspec", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "packaging", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e8/e6/6b6029f5fe2ad7f1211105d530e34d991014c2cae463f9223033031cfc4f/gradio_client-2.5.0.tar.gz", hash = "sha256:4cde99bad62149595c30c90876ca2e405e3a13687ecf895474f3412cb476673d", size = 59013, upload-time = "2026-04-20T23:16:21.518Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/81/0a861b8e1ff42960139c6cd4c7dd591292fa09ea1ae2d87677441cba4c00/gradio_client-2.5.0-py3-none-any.whl", hash = "sha256:d43e2179c29076292a76485ad7ed2e6eaa19d14ac58283bd7f5beabfe4ca958c", size = 59952, upload-time = "2026-04-20T23:16:20.186Z" },
 ]
 
 [[package]]
@@ -1398,6 +1628,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/11/8c/c9138d881c79aa0ea9ed83cbd58d5ca75624378b38cee225dcf5c42cc91f/griffelib-2.0.2-py3-none-any.whl", hash = "sha256:925c857658fb1ba40c0772c37acbc2ab650bd794d9c1b9726922e36ea4117ea1", size = 142357, upload-time = "2026-03-27T11:34:46.275Z" },
 ]
 
+[[package]]
+name = "groovy"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/36/bbdede67400277bef33d3ec0e6a31750da972c469f75966b4930c753218f/groovy-0.1.2.tar.gz", hash = "sha256:25c1dc09b3f9d7e292458aa762c6beb96ea037071bf5e917fc81fb78d2231083", size = 17325, upload-time = "2025-02-28T20:24:56.068Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/27/3d6dcadc8a3214d8522c1e7f6a19554e33659be44546d44a2f7572ac7d2a/groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64", size = 14090, upload-time = "2025-02-28T20:24:55.152Z" },
+]
+
 [[package]]
 name = "grpcio"
 version = "1.80.0"
@@ -1463,6 +1702,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
 ]
 
+[[package]]
+name = "harnesses"
+source = { editable = "deps/verifiers/packages/harnesses" }
+dependencies = [
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aiohttp", marker = "extra == 'nemogym'", specifier = ">=3.9.0" },
+    { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" },
+    { name = "verifiers", specifier = ">=0.1.15.dev158" },
+]
+provides-extras = ["nemogym"]
+
+[[package]]
+name = "hf-gradio"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "gradio-client", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ce/86/c9694b7cfada5780e75769e60dc161a161f4dd7fc91b61db5e3a3338bef9/hf_gradio-0.4.1.tar.gz", hash = "sha256:a017d942618f0d495a58ee4563047fa04bef614c00e0cb789a9a6d0633cffa7b", size = 6560, upload-time = "2026-04-22T14:01:32.334Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/30/2d/afff2ee87e75d8eb85c92bb8cf0e15b05c23c2ebd8fd8dec781d8601ed7f/hf_gradio-0.4.1-py3-none-any.whl", hash = "sha256:76b8cb8be6abe62d74c1ad2d35b42f0629db89aa9e1a8d033cecfe7c856eeab3", size = 4482, upload-time = "2026-04-17T19:53:31.827Z" },
+]
+
 [[package]]
 name = "hf-xet"
 version = "1.5.0"
@@ -1481,13 +1748,13 @@ version = "0.2.1"
 source = { editable = "deps/research-environments/environments/hle" }
 dependencies = [
     { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "pillow", specifier = ">=12.0.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -1632,7 +1899,7 @@ dependencies = [
     { name = "immutabledict", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "langdetect", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -1640,7 +1907,7 @@ requires-dist = [
     { name = "immutabledict" },
     { name = "langdetect" },
     { name = "nltk" },
-    { name = "verifiers", specifier = ">=0.1.10" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -1775,6 +2042,39 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/56/6d/0d9848617b9f753b87f214f1c682592f7ca42de085f564352f10f0843026/ipywidgets-8.1.8-py3-none-any.whl", hash = "sha256:ecaca67aed704a338f88f67b1181b58f821ab5dc89c1f0f5ef99db43c1c2921e", size = 139808, upload-time = "2025-11-01T21:18:10.956Z" },
 ]
 
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" },
+]
+
+[[package]]
+name = "jaraco-context"
+version = "6.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/50/4763cd07e722bb6285316d390a164bc7e479db9d90daa769f22578f698b4/jaraco_context-6.1.2.tar.gz", hash = "sha256:f1a6c9d391e661cc5b8d39861ff077a7dc24dc23833ccee564b234b81c82dfe3", size = 16801, upload-time = "2026-03-20T22:13:33.922Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/58/bc8954bda5fcda97bd7c19be11b85f91973d67a706ed4a3aec33e7de22db/jaraco_context-6.1.2-py3-none-any.whl", hash = "sha256:bf8150b79a2d5d91ae48629d8b427a8f7ba0e1097dd6202a9059f29a36379535", size = 7871, upload-time = "2026-03-20T22:13:32.808Z" },
+]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/cf/ea4ef2920830dea3f5ab2ea4da6fb67724e6dca80ee2553788c3607243d0/jaraco_functools-4.5.0.tar.gz", hash = "sha256:3bb5665ea4a020cf78a7040e89154c77edadb3ca74f366479669c5999aa70b03", size = 20272, upload-time = "2026-05-15T21:34:10.025Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/9a/982e48afcffcd727a9144506720ffd4224b6b7e355c98641866f38b7c043/jaraco_functools-4.5.0-py3-none-any.whl", hash = "sha256:79ce39246eddbde4b3a03b77ea5f0f7878dc669b166a66cf3fa8e266aa3fa2f4", size = 10594, upload-time = "2026-05-15T21:34:08.595Z" },
+]
+
 [[package]]
 name = "jaxtyping"
 version = "0.3.10"
@@ -1799,6 +2099,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9a/93/242e2eab5fe682ffcb8b0084bde703a41d51e17ee0f3a31ff0d9d813620a/jedi-0.20.0-py2.py3-none-any.whl", hash = "sha256:7bdd9c2634f56713299976f4cbd59cb3fa92165cc5e05ea811fb253480728b67", size = 4884812, upload-time = "2026-05-01T23:38:43.919Z" },
 ]
 
+[[package]]
+name = "jeepney"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -1843,6 +2152,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" },
 ]
 
+[[package]]
+name = "joserfc"
+version = "1.6.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5d/ac/d4fd5b30f82900eac60d765f179f0ba005825ac462cc8ced6e13ec685ab3/joserfc-1.6.8.tar.gz", hash = "sha256:878620c553a6ebdd76ccdc356782fee3f735f21a356d079a546b42a4670ace5f", size = 232930, upload-time = "2026-05-27T03:22:37.819Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/98/8c/5cdce2cf3ce8155849baf9a5e2ce77e89dc87ec3bdb38259e5d85fbc45bd/joserfc-1.6.8-py3-none-any.whl", hash = "sha256:22fb31a69094a5e6f44632002a9df2c30c941fc6c8ce1b037e92c03de954cf9f", size = 70927, upload-time = "2026-05-27T03:22:35.796Z" },
+]
+
+[[package]]
+name = "jsonref"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload-time = "2023-01-16T16:10:04.455Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload-time = "2023-01-16T16:10:02.255Z" },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.26.0"
@@ -1858,6 +2188,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
 ]
 
+[[package]]
+name = "jsonschema-path"
+version = "0.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pathable", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "referencing", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/39/79/cd02a4df6d9270efdc7d3feefe6edd730b0820c39eeaa107a2faee8322d5/jsonschema_path-0.5.0.tar.gz", hash = "sha256:493b156ba895c97602655b620a8456caa2ce08c1aa389f5a7addec065e6e855c", size = 19597, upload-time = "2026-05-19T20:45:00.971Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/2c/9e69d73c4297508be9e3b64a970ea3971b3eb8db64ffc5802d40bd25981f/jsonschema_path-0.5.0-py3-none-any.whl", hash = "sha256:2790a070bc7abb08ea3dbe4d340ece4efadf639223001f020c7503229ba068e2", size = 24077, upload-time = "2026-05-19T20:44:59.225Z" },
+]
+
 [[package]]
 name = "jsonschema-specifications"
 version = "2025.9.1"
@@ -1936,6 +2281,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/87/ea/fe955e157094d93fe20919441dc709ba7afbcf933de5896b546d5c217938/kernels_data-0.14.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ec694ae26ed8569f613e5cd4269222210cb57b1c16cf8b0c5acbf0082324804f", size = 1414055, upload-time = "2026-05-14T06:41:14.424Z" },
 ]
 
+[[package]]
+name = "keyring"
+version = "25.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jaraco-classes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jaraco-context", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jaraco-functools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jeepney", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "secretstorage", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" },
+]
+
 [[package]]
 name = "kiwisolver"
 version = "1.5.0"
@@ -2070,17 +2431,17 @@ wheels = [
 
 [[package]]
 name = "livecodebench"
-version = "0.2.6"
+version = "0.2.7"
 source = { editable = "deps/research-environments/environments/livecodebench" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
-    { name = "verifiers", specifier = ">=0.1.15.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2126,7 +2487,7 @@ dependencies = [
     { name = "markdown", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "sympy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -2134,7 +2495,7 @@ requires-dist = [
     { name = "markdown", specifier = ">=3.5.1" },
     { name = "math-verify", specifier = ">=0.8.0" },
     { name = "sympy", specifier = ">=1.12.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2213,19 +2574,19 @@ wheels = [
 
 [[package]]
 name = "math-env"
-version = "0.1.5"
+version = "0.1.6"
 source = { editable = "deps/research-environments/environments/math_env" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2235,7 +2596,7 @@ source = { editable = "deps/verifiers/environments/math_python" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -2263,13 +2624,13 @@ version = "0.1.17"
 source = { editable = "deps/research-environments/environments/math500" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2353,13 +2714,13 @@ wheels = [
 
 [[package]]
 name = "mini-swe-agent-plus"
-version = "0.2.24"
+version = "0.2.25"
 source = { editable = "deps/research-environments/environments/mini_swe_agent_plus" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -2367,26 +2728,7 @@ requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
     { name = "swebench", specifier = "==4.1.0" },
     { name = "tenacity" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
-]
-
-[[package]]
-name = "mini-swe-agent-plus-rlm"
-version = "0.1.6"
-source = { editable = "deps/research-environments/environments/mini_swe_agent_plus_rlm" }
-dependencies = [
-    { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "prime-sandboxes", specifier = ">=0.2.19" },
-    { name = "swebench", specifier = "==4.1.0" },
-    { name = "tenacity" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2444,13 +2786,13 @@ version = "0.1.3"
 source = { editable = "deps/research-environments/environments/mmlu_pro" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2525,6 +2867,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3a/a4/a187adcd485ff27bdbdb5c2b4d9cf210427bc74bcaacfc8226409db17535/mooncake_transfer_engine-0.3.11.post1-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:1ccad9f44cf1a67f4e0494bd02f505503139ab606ecbe76cd6050d7a069247d5", size = 18089789, upload-time = "2026-05-24T16:19:01.828Z" },
 ]
 
+[[package]]
+name = "more-itertools"
+version = "11.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/1d/f4da6f02cdffe04d6362210b807146a26044c88d839208aec273bb0d9184/more_itertools-11.1.0.tar.gz", hash = "sha256:48e8f4d9e7e5878571ecf6f2b4e57634f93cd474cc8cfbd2376f2d11b396e30d", size = 145772, upload-time = "2026-05-22T14:14:29.909Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e8/3d/1087453384dbde46a8c7f9356eead2c58be8a7bf156bca40243377c85715/more_itertools-11.1.0-py3-none-any.whl", hash = "sha256:4b65538ae22f6fed0ce4874efd317463a7489796a0939fa66824dd542125a192", size = 72226, upload-time = "2026-05-22T14:14:28.824Z" },
+]
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -3046,83 +3397,95 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" },
 ]
 
+[[package]]
+name = "openapi-pydantic"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/02/2e/58d83848dd1a79cb92ed8e63f6ba901ca282c5f09d04af9423ec26c56fd7/openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d", size = 60892, upload-time = "2025-01-08T19:29:27.083Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" },
+]
+
 [[package]]
 name = "opencode-cp"
-version = "0.3.10"
+version = "0.3.12"
 source = { editable = "deps/research-environments/environments/opencode_cp" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17,<0.1.15.dev150" },
 ]
 
 [[package]]
 name = "opencode-deepdive"
-version = "0.1.16"
+version = "0.1.17"
 source = { editable = "deps/research-environments/environments/opencode_deepdive" }
 dependencies = [
     { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "datasets", specifier = ">=4.0.0" },
     { name = "prime-sandboxes", specifier = ">=0.2.25" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17,<0.1.15.dev150" },
 ]
 
 [[package]]
 name = "opencode-math"
-version = "0.4.11"
+version = "0.4.13"
 source = { editable = "deps/research-environments/environments/opencode_math" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17,<0.1.15.dev150" },
 ]
 
 [[package]]
 name = "opencode-science"
-version = "0.3.11"
+version = "0.3.13"
 source = { editable = "deps/research-environments/environments/opencode_science" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17,<0.1.15.dev150" },
 ]
 
 [[package]]
 name = "opencode-swe"
-version = "0.4.7"
+version = "0.4.9"
 source = { editable = "deps/research-environments/environments/opencode_swe" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.23" },
     { name = "swebench", specifier = "==4.1.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17,<0.1.15.dev150" },
 ]
 
 [[package]]
@@ -3139,6 +3502,56 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4b/33/b5db29a6c00eb8f50708110d8d453747ca125c8b805bc437b289dbdcc057/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0bd48544f77c68b2941392fcdf9bcd2b9cdf00e98cb8c29b2455d194763cf99e", size = 60391106, upload-time = "2026-02-05T10:30:14.236Z" },
 ]
 
+[[package]]
+name = "openenv-core"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "fastmcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "gradio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tomli-w", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ce/d6/3bebe8afb55fcc3ea9251c4c2dfbab2879e31089bc91a8fe9696e5ce019b/openenv_core-0.3.0.tar.gz", hash = "sha256:c7fee2035badab5be497eb6f4afb2cb417de000f82cc19afd72fb5ec332c431d", size = 164720, upload-time = "2026-05-11T11:37:57.274Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/f5/aafa43138589bfd5d369a8d02ea365aae9d6fe55ac0b3894368d6d69bd03/openenv_core-0.3.0-py3-none-any.whl", hash = "sha256:859e875c9d5211b157c30fb9abc681606fcf0bf1b6ffcdf404678992823a1df0", size = 194313, upload-time = "2026-05-11T11:37:55.537Z" },
+]
+
+[[package]]
+name = "openreward"
+version = "0.1.125"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "anthropic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "click", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "google-genai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "sse-starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "structlog", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9d/dd/b69264c77fd9720f69443c5f6420a7ae9934b4e2799e276d8655f6dc0721/openreward-0.1.125.tar.gz", hash = "sha256:519687307f960ab3a395bf844d6c2fc018d8a0faad0fc367ad3b24331366d390", size = 138936, upload-time = "2026-05-21T10:24:10.075Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/72/11/51c5473d5b3d209ecaacee6ac5418eef15114d0817a98f9cee0a2d6364ee/openreward-0.1.125-py3-none-any.whl", hash = "sha256:784faeeef6aba2ce8f175bd9af5dff29b3bb0c07a4c3642eb56dc431c9af2924", size = 135521, upload-time = "2026-05-21T10:24:11.369Z" },
+]
+
 [[package]]
 name = "opentelemetry-api"
 version = "1.42.1"
@@ -3347,6 +3760,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/42/32/658973117bf0fd82a24abbfb94fe73a5e86216e49342985e10acce54775a/partial_json_parser-0.2.1.1.post7-py3-none-any.whl", hash = "sha256:145119e5eabcf80cbb13844a6b50a85c68bf99d376f8ed771e2a3c3b03e653ae", size = 10877, upload-time = "2025-11-17T07:27:40.457Z" },
 ]
 
+[[package]]
+name = "pathable"
+version = "0.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/f3/5a20387de9bcd0607871bfc2198ee0e15836da7baa4592ccd7f24c27c986/pathable-0.6.0.tar.gz", hash = "sha256:6404b8b82aef5ff0fd478934137128b99b12212ba35afdde5525ca4f8388ea58", size = 18970, upload-time = "2026-05-19T18:15:11.911Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/e8/6d75ffd9784bce2e93d1ae4415649427e39a53bb172d4672b2b59c6f0a7b/pathable-0.6.0-py3-none-any.whl", hash = "sha256:82c4ca6c98c502ad12e0d4e9779b6210afee93c38990988c8c5d1b49bdcdf566", size = 18983, upload-time = "2026-05-19T18:15:10.728Z" },
+]
+
 [[package]]
 name = "pdfminer-six"
 version = "20260107"
@@ -3453,7 +3875,7 @@ dependencies = [
     { name = "toml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/99/62/3361ae244cfb65009a1a089c4101c02e953fe3b126b2337c44ac50aac51e/prime-0.6.10.tar.gz", hash = "sha256:eee8341905e5daed4f751a2bf0f03e5d52f420450e5f45c4d5aa1856b806faea", size = 629514, upload-time = "2026-05-27T20:53:45.841Z" }
 wheels = [
@@ -3542,7 +3964,7 @@ dependencies = [
     { name = "torchvision", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "transformers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "uvloop", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "vllm", version = "0.22.0+cu129", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.22.0/vllm-0.22.0+cu129-cp38-abi3-manylinux_2_28_aarch64.whl" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
     { name = "vllm", version = "0.22.0+cu129", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.22.0/vllm-0.22.0+cu129-cp38-abi3-manylinux_2_28_x86_64.whl" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "wandb", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -3584,7 +4006,6 @@ envs = [
     { name = "math-python", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "math500", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "mini-swe-agent-plus", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "mini-swe-agent-plus-rlm", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "mmlu-pro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "opencode-cp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "opencode-deepdive", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -3663,7 +4084,6 @@ requires-dist = [
     { name = "math-python", marker = "extra == 'envs'", editable = "deps/verifiers/environments/math_python" },
     { name = "math500", marker = "extra == 'envs'", editable = "deps/research-environments/environments/math500" },
     { name = "mini-swe-agent-plus", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mini_swe_agent_plus" },
-    { name = "mini-swe-agent-plus-rlm", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mini_swe_agent_plus_rlm" },
     { name = "mmlu-pro", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mmlu_pro" },
     { name = "modelexpress", marker = "extra == 'modelexpress'", specifier = "==0.3.0" },
     { name = "mooncake-transfer-engine", specifier = ">=0.3.10.post2" },
@@ -3879,6 +4299,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
 ]
 
+[[package]]
+name = "py-key-value-aio"
+version = "0.4.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "beartype", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/e2/d689d922894a7ecde73b6daeaf9b13dab5aae06fe6aaaf7514722644d382/py_key_value_aio-0.4.5.tar.gz", hash = "sha256:c6563a2c6abe5da5e20f4f9e875c2a9b425a2244a54fadbf46cf140a9eea45d7", size = 107547, upload-time = "2026-05-27T16:37:08.107Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/95/b8ba862968712caa12a19666175334fa979e1f198b896a430adb3bacfe87/py_key_value_aio-0.4.5-py3-none-any.whl", hash = "sha256:ab862adbcb8c72547d1c57821f22cbbb71ab86509039c96f36e914e0336c8dd7", size = 170005, upload-time = "2026-05-27T16:37:06.629Z" },
+]
+
+[package.optional-dependencies]
+filetree = [
+    { name = "aiofile", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+keyring = [
+    { name = "keyring", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+memory = [
+    { name = "cachetools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
 [[package]]
 name = "pyarrow"
 version = "24.0.0"
@@ -3891,6 +4336,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490, upload-time = "2026-04-21T10:47:55.981Z" },
 ]
 
+[[package]]
+name = "pyasn1"
+version = "0.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" },
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyasn1", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
+]
+
 [[package]]
 name = "pybase64"
 version = "1.4.3"
@@ -4004,6 +4470,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ae/8d/f1af3832f5e6eb13ba94ee809e72b8ecb5eef226d27ee0bef7d963d943c7/pydantic_settings-2.14.1-py3-none-any.whl", hash = "sha256:6e3c7edfd8277687cdc598f56e5cff0e9bfff0910a3749deaa8d4401c3a2b9de", size = 60964, upload-time = "2026-05-08T13:40:04.958Z" },
 ]
 
+[[package]]
+name = "pydub"
+version = "0.25.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" },
+]
+
 [[package]]
 name = "pyelftools"
 version = "0.32"
@@ -4052,6 +4527,17 @@ crypto = [
     { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
+[[package]]
+name = "pymupdf"
+version = "1.27.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/22/32/708bedc9dde7b328d45abbc076091769d44f2f24ad151ad92d56a6ec142b/pymupdf-1.27.2.3.tar.gz", hash = "sha256:7a92faa25129e8bbec5e50eeb9214f187665428c31b05c4ef6e36c58c0b1c6d2", size = 85759618, upload-time = "2026-04-24T14:13:14.42Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c3/26/b7e5a70eb83bd189f8b5df87ec442746b992f2f632662839b288170d357d/pymupdf-1.27.2.3-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1dd460a3ae4597a755f00a3bd9771f5ebf1531dc111f6a36bf05dd00a6b84425", size = 24333923, upload-time = "2026-04-24T14:09:47.341Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/a0/aa1ee2240f29481a04a827c313333b4ecd8a14d6ac3e15d3f41a30574781/pymupdf-1.27.2.3-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:857842b4888827bd6155a1131341b2822a7ebe9a8c15a975fd7d490d7a64a30c", size = 24963198, upload-time = "2026-04-24T14:10:07.408Z" },
+    { url = "https://files.pythonhosted.org/packages/69/49/4f742451f980840829fc00ba158bebb25d389c846d8f4f8c65936ee55de8/pymupdf-1.27.2.3-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:580983849c64a08d08344ca3d1580e87c01f046a8392421797bc850efd72a5b6", size = 25184609, upload-time = "2026-04-24T14:10:22.911Z" },
+]
+
 [[package]]
 name = "pynacl"
 version = "1.6.2"
@@ -4080,6 +4566,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
 ]
 
+[[package]]
+name = "pyperclip"
+version = "1.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" },
+]
+
 [[package]]
 name = "pypika"
 version = "0.51.1"
@@ -4197,6 +4692,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8f/cb/769cfc37177252872a45a71f3fbdde9d51b471a3f3c14bfe95dde3407386/python_multipart-0.0.29-py3-none-any.whl", hash = "sha256:2ddcc971cef266225f54f552d8fa10bcfbb1f14446caec199060daac59ff2d69", size = 29640, upload-time = "2026-05-17T17:29:45.69Z" },
 ]
 
+[[package]]
+name = "pytz"
+version = "2026.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/46/dd499ec9038423421951e4fad73051febaa13d2df82b4064f87af8b8c0c3/pytz-2026.2.tar.gz", hash = "sha256:0e60b47b29f21574376f218fe21abc009894a2321ea16c6754f3cad6eb7cdd6a", size = 320861, upload-time = "2026-05-04T01:35:29.667Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/dd/96da98f892250475bdf2328112d7468abdd4acc7b902b6af23f4ed958ea0/pytz-2026.2-py2.py3-none-any.whl", hash = "sha256:04156e608bee23d3792fd45c94ae47fae1036688e75032eea2e3bf0323d1f126", size = 510141, upload-time = "2026-05-04T01:35:27.408Z" },
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@@ -4347,7 +4851,7 @@ version = "0.1.4"
 source = { editable = "deps/verifiers/environments/reverse_text" }
 dependencies = [
     { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -4369,6 +4873,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" },
 ]
 
+[[package]]
+name = "rich-rst"
+version = "2.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pygments", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/56/3191bae66b08ccc637ea8120426068bcb361cc323c96404c310886937067/rich_rst-2.0.1.tar.gz", hash = "sha256:cbe236ed0901d1ec8427cc6a50bf0a34353ba28ad014dc24def68bfe7f3b9e68", size = 300570, upload-time = "2026-05-16T00:47:57.362Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/3d/55c17d3ebdf3cd81356002afe5bef9bb8af631db2819785b6eac845b925b/rich_rst-2.0.1-py3-none-any.whl", hash = "sha256:7ee15f345ce25fa02b582c272a6cdbaf0c21243e38061cea273cff659bf3ef61", size = 272922, upload-time = "2026-05-16T00:47:55.508Z" },
+]
+
 [[package]]
 name = "rich-toolkit"
 version = "0.19.10"
@@ -4406,13 +4923,13 @@ wheels = [
 
 [[package]]
 name = "rlm-swe"
-version = "0.4.2"
+version = "0.4.4"
 source = { editable = "deps/research-environments/environments/rlm_swe" }
 dependencies = [
     { name = "multi-swe-bench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -4420,7 +4937,7 @@ requires-dist = [
     { name = "multi-swe-bench", specifier = ">=1.1.2" },
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
     { name = "swebench", specifier = "==4.1.0" },
-    { name = "verifiers", specifier = ">=0.1.13.dev8" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -4447,6 +4964,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ed/c5/8864e4e7925b836ea354b31d57641ec03830564e281a8b6f061f8c3e0ec1/ruff-0.15.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:bb01d645694e3ec0102105d07ef2d53703970407d59c04e59d3ba0b7a1d53553", size = 11560214, upload-time = "2026-05-21T14:34:50.975Z" },
 ]
 
+[[package]]
+name = "safehttpx"
+version = "0.1.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/89/d1/4282284d9cf1ee873607a46442da977fc3c985059315ab23610be31d5885/safehttpx-0.1.7.tar.gz", hash = "sha256:db201c0978c41eddb8bb480f3eee59dd67304fdd91646035e9d9a720049a9d23", size = 10385, upload-time = "2025-10-24T18:30:09.783Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/a3/0f0b7d78e2f1eb9e8e1afbff1d2bff8d60144aee17aca51c065b516743dd/safehttpx-0.1.7-py3-none-any.whl", hash = "sha256:c4f4a162db6993464d7ca3d7cc4af0ffc6515a606dfd220b9f82c6945d869cde", size = 8959, upload-time = "2025-10-24T18:30:08.733Z" },
+]
+
 [[package]]
 name = "safetensors"
 version = "0.7.0"
@@ -4465,13 +4994,13 @@ version = "0.1.4"
 source = { editable = "deps/research-environments/environments/science_env" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -4519,6 +5048,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" },
 ]
 
+[[package]]
+name = "secretstorage"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jeepney", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" },
+]
+
+[[package]]
+name = "semantic-version"
+version = "2.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7d/31/f2289ce78b9b473d582568c234e104d2a342fd658cc288a7553d83bb8595/semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c", size = 52289, upload-time = "2022-05-26T13:35:23.454Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552, upload-time = "2022-05-26T13:35:21.206Z" },
+]
+
 [[package]]
 name = "sentencepiece"
 version = "0.2.1"
@@ -4577,11 +5128,11 @@ name = "simpleqa-verified"
 version = "0.1.2"
 source = { editable = "deps/research-environments/environments/simpleqa_verified" }
 dependencies = [
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
-requires-dist = [{ name = "verifiers", specifier = ">=0.1.15.dev1" }]
+requires-dist = [{ name = "verifiers", specifier = ">=0.1.15.dev17" }]
 
 [[package]]
 name = "six"
@@ -4621,15 +5172,14 @@ wheels = [
 
 [[package]]
 name = "sse-starlette"
-version = "3.4.4"
+version = "2.3.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f7/2b/58abc2d1fd397e7dde08e947e05c884d8ef2f78d5e2588c17a12d42d6994/sse_starlette-3.4.4.tar.gz", hash = "sha256:07e0fa0460138baf25cdd5fb28683472c3995dc1642225191b3832d62526bcb0", size = 31819, upload-time = "2026-05-12T17:37:17.019Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/f4/989bc70cb8091eda43a9034ef969b25145291f3601703b82766e5172dfed/sse_starlette-2.3.6.tar.gz", hash = "sha256:0382336f7d4ec30160cf9ca0518962905e1b69b72d6c1c995131e0a703b436e3", size = 18284, upload-time = "2025-05-30T13:34:12.914Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/dc/67/805710444ea8cc75fbf70b920ed431a560c4bf9c57f7d5a3117213189399/sse_starlette-3.4.4-py3-none-any.whl", hash = "sha256:3f4dd50d8aed2771a091f3a83000323fc3844541c16b4fe585ae2420cc6df973", size = 16514, upload-time = "2026-05-12T17:37:15.601Z" },
+    { url = "https://files.pythonhosted.org/packages/81/05/78850ac6e79af5b9508f8841b0f26aa9fd329a1ba00bf65453c2d312bcc8/sse_starlette-2.3.6-py3-none-any.whl", hash = "sha256:d49a8285b182f6e2228e2609c350398b2ca2c36216c2675d875f81e93548f760", size = 10606, upload-time = "2025-05-30T13:34:11.703Z" },
 ]
 
 [[package]]
@@ -4659,6 +5209,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
 ]
 
+[[package]]
+name = "structlog"
+version = "25.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" },
+]
+
 [[package]]
 name = "supervisor"
 version = "4.3.0"
@@ -4745,6 +5304,36 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" },
 ]
 
+[[package]]
+name = "tasksets"
+source = { editable = "deps/verifiers/packages/tasksets" }
+dependencies = [
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.optional-dependencies]
+openenv = [
+    { name = "openenv-core", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+openreward = [
+    { name = "openreward", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+ta = [
+    { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "textarena", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" },
+    { name = "nltk", marker = "extra == 'ta'" },
+    { name = "openenv-core", marker = "extra == 'openenv'", specifier = ">=0.3.0" },
+    { name = "openreward", marker = "python_full_version >= '3.11' and extra == 'openreward'", specifier = ">=0.1.123" },
+    { name = "textarena", marker = "extra == 'ta'" },
+    { name = "verifiers", specifier = ">=0.1.15.dev11" },
+]
+provides-extras = ["nemogym", "openenv", "openreward", "ta"]
+
 [[package]]
 name = "tau2"
 version = "0.2.1.dev0"
@@ -4784,13 +5373,13 @@ version = "0.2.3"
 source = { editable = "deps/research-environments/environments/tau2_bench" }
 dependencies = [
     { name = "tau2", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "tau2", git = "https://github.com/sierra-research/tau2-bench.git?rev=337326e" },
-    { name = "verifiers", specifier = ">=0.1.15.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -5012,11 +5601,11 @@ wheels = [
 
 [[package]]
 name = "tomlkit"
-version = "0.15.0"
+version = "0.14.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/51/db/03eaf4331631ef6b27d6e3c9b68c54dc6f0d63d87201fed600cc409307fd/tomlkit-0.15.0.tar.gz", hash = "sha256:7d1a9ecba3086638211b13814ea79c90dd54dd11993564376f3aa92271f5c7a3", size = 161875, upload-time = "2026-05-10T07:38:22.245Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/af/14b24e41977adb296d6bd1fb59402cf7d60ce364f90c890bd2ec65c43b5a/tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064", size = 187167, upload-time = "2026-01-13T01:14:53.304Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6a/43/8bd850ee71a191bf072e31302c73a66be413fecdd98fdcd111ecbcce13ca/tomlkit-0.15.0-py3-none-any.whl", hash = "sha256:4dbc8f0fc024412b57ced8757ac7461305126a648ff8c2c807fcb8e133a78738", size = 41328, upload-time = "2026-05-10T07:38:23.517Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310, upload-time = "2026-01-13T01:14:51.965Z" },
 ]
 
 [[package]]
@@ -5280,6 +5869,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/61/73/d21edf5b204d1467e06500080a50f79d49ef2b997c79123a536d4a17d97c/uc_micro_py-2.0.0-py3-none-any.whl", hash = "sha256:3603a3859af53e5a39bc7677713c78ea6589ff188d70f4fee165db88e22b242c", size = 6383, upload-time = "2026-03-01T06:31:26.257Z" },
 ]
 
+[[package]]
+name = "uncalled-for"
+version = "0.3.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/82/345cc927f7fbdae6065e7768759932fcc827fc20b29b45dfbafa2f1f7da4/uncalled_for-0.3.2.tar.gz", hash = "sha256:89f5dbcd71e2b8f47c030b1fa302e6cce2ec795d1ac565eeb6525c5fe55cb8a2", size = 50032, upload-time = "2026-05-06T13:38:25.204Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/25/2c87754f3a9e692315f7b811244090e68f362979fc8886b3fbd2985a1d8c/uncalled_for-0.3.2-py3-none-any.whl", hash = "sha256:0ff60b142c7d1f8070bde9d42afaa70aedc77dcc10998c227687e9c15713418e", size = 11444, upload-time = "2026-05-06T13:38:24.025Z" },
+]
+
 [[package]]
 name = "unidiff"
 version = "0.7.5"
@@ -5337,8 +5935,10 @@ wheels = [
 name = "verifiers"
 source = { editable = "deps/verifiers" }
 dependencies = [
+    { name = "aiohttp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "aiolimiter", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "anthropic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "certifi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "gepa", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -5350,10 +5950,12 @@ dependencies = [
     { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "openai-agents", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-pydantic-config", extra = ["toml"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "prime-tunnel", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pymupdf", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "pyzmq", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "regex", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -5364,12 +5966,20 @@ dependencies = [
     { name = "uvloop", marker = "(platform_machine == 'aarch64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux')" },
 ]
 
+[package.optional-dependencies]
+packages = [
+    { name = "harnesses", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tasksets", extra = ["openenv", "openreward", "ta"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "accelerate", marker = "extra == 'rl'", specifier = ">=1.4.0" },
+    { name = "aiohttp", specifier = ">=3.9.0" },
     { name = "aiohttp", marker = "extra == 'browser'", specifier = ">=3.9.0" },
     { name = "aiolimiter", specifier = ">=1.2.1" },
     { name = "anthropic", specifier = ">=0.78.0" },
+    { name = "certifi" },
     { name = "datasets", specifier = ">=3.0.0,<4.7.0" },
     { name = "deepspeed", marker = "extra == 'rl'", specifier = ">=0.17.6" },
     { name = "flash-attn", marker = "extra == 'rl'", specifier = ">=2.8.3" },
@@ -5389,10 +5999,12 @@ requires-dist = [
     { name = "openai", specifier = ">=1.108.1" },
     { name = "openai-agents", specifier = ">=0.0.7" },
     { name = "peft", marker = "extra == 'rl'" },
+    { name = "pillow" },
     { name = "prime-pydantic-config", extras = ["toml"] },
     { name = "prime-sandboxes", specifier = ">=0.2.25" },
     { name = "prime-tunnel", specifier = ">=0.1.6" },
     { name = "pydantic", specifier = ">=2.11.9" },
+    { name = "pymupdf" },
     { name = "python-dotenv", marker = "extra == 'browser'", specifier = ">=1.0.0" },
     { name = "pyzmq", specifier = ">=27.1.0" },
     { name = "reasoning-gym", marker = "extra == 'rg'" },
@@ -5967,7 +6579,7 @@ dependencies = [
     { name = "chromadb", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -5985,7 +6597,7 @@ source = { editable = "deps/verifiers/environments/wordle" }
 dependencies = [
     { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "textarena", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]

From 77b85673702b8ac939a5c2e105de54a676276dbe Mon Sep 17 00:00:00 2001
From: Christian <cdreetz@gmail.com>
Date: Tue, 9 Jun 2026 20:48:06 -0700
Subject: [PATCH 12/12] explicit del and malloc

---
 src/prime_rl/orchestrator/orchestrator.py | 50 +++++++++++++++--------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py
index 9823dd73d0..d8595d326e 100644
--- a/src/prime_rl/orchestrator/orchestrator.py
+++ b/src/prime_rl/orchestrator/orchestrator.py
@@ -22,6 +22,7 @@
 
 import asyncio
 import ctypes
+import gc
 import logging
 import os
 import time
@@ -109,6 +110,14 @@
 TARGET_LAG = 1
 
 
+def _release_unused_memory() -> None:
+    gc.collect()
+    try:
+        ctypes.CDLL("libc.so.6").malloc_trim(0)
+    except (OSError, AttributeError) as e:
+        get_logger().debug(f"malloc_trim(0) unavailable: {e}")
+
+
 class Orchestrator:
     # Set in ``__init__``
     config: OrchestratorConfig
@@ -476,10 +485,7 @@ async def start(self) -> None:
                 get_logger().success("Orchestrator finished.")
             else:
                 get_logger().warning("Orchestrator cleanup complete (forced).")
-            try:
-                ctypes.CDLL("libc.so.6").malloc_trim(0)
-            except Exception as e:
-                get_logger().debug(f"malloc_trim(0) failed: {e}")
+            _release_unused_memory()
 
     async def main_loop(self) -> None:
         """Consume ``FinishedRollout``\\ s from the dispatcher and route them
@@ -496,19 +502,29 @@ async def main_loop(self) -> None:
             except asyncio.TimeoutError:
                 continue
 
-            if isinstance(rollout, EvalRollout):
-                assert self.eval_sink is not None  # eval rollouts only emitted when eval is configured
-                eval_batch = self.eval_sink.add(rollout)
-                if eval_batch is not None:
-                    await self.finalize_eval_batch(eval_batch)
-                continue
-
-            assert isinstance(rollout, TrainRollout)
-            train_batch = await self.train_sink.add(rollout)
-            # In drain mode any late-arriving train batch is dropped — we
-            # don't want to ship past ``max_steps``
-            if train_batch is not None and not self.draining and not self.stopped.is_set():
-                await self.finalize_train_batch(train_batch)
+            batch = None
+            should_release_memory = False
+            try:
+                if isinstance(rollout, EvalRollout):
+                    assert self.eval_sink is not None  # eval rollouts only emitted when eval is configured
+                    batch = self.eval_sink.add(rollout)
+                    if batch is not None:
+                        should_release_memory = True
+                        await self.finalize_eval_batch(batch)
+                    continue
+
+                assert isinstance(rollout, TrainRollout)
+                batch = await self.train_sink.add(rollout)
+                # In drain mode any late-arriving train batch is dropped — we
+                # don't want to ship past ``max_steps``
+                if batch is not None:
+                    should_release_memory = True
+                if batch is not None and not self.draining and not self.stopped.is_set():
+                    await self.finalize_train_batch(batch)
+            finally:
+                del batch, rollout
+                if should_release_memory:
+                    _release_unused_memory()
 
     async def finalize_train_batch(self, batch: TrainBatch) -> None:
         """Ship one ``TrainBatch`` out to the trainer and handle the I/O