From 209f76eb525722bbb69b1ed12de9d2488e607afe Mon Sep 17 00:00:00 2001 From: Andre Fu <39042250+andre-fu@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:21:11 -0700 Subject: [PATCH 01/12] fix(install): support aarch64 hosts with any compute capability (#2587) The aarch64 host install path was broken: `uv sync` installs flash-attn from PyPI source but pyproject sets FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE, so the compiled extension never builds. `scripts/docker-arm64-post-install.sh` fixed it for Docker GB200 builds but hardcoded sm_100 and /app/.venv, leaving Hopper hosts (H100/H200/GH200) without a recipe. Changes: - `scripts/docker-arm64-post-install.sh`: auto-detect compute capability via nvidia-smi when available; parameterize venv path. Preserves the sm_100 default when no GPU is visible (Docker buildx). - `scripts/install.sh`: call the post-install for aarch64 hosts after `uv sync --all-extras`. Previously the script ran uv sync and exited, leaving aarch64 users with a broken venv. - `README.md`: document the aarch64 post-install step (mirrors the existing 3.1 Flash Attention 3 pattern). Validated on GH200 (sm_90, aarch64): - forward + backward parity vs torch SDPA (max diff < 0.05 / 0.25) - 383/384 unit tests pass (the 1 failure is unrelated TileLang/MoE) - SFT trainer smoke test (5 steps, Qwen3-0.6B) runs with flash_attention_2 Co-authored-by: Claude Opus 4.7 (1M context) --- README.md | 9 ++++++ scripts/docker-arm64-post-install.sh | 47 ++++++++++++++++++++++------ scripts/install.sh | 10 ++++++ 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index e6321c9006..d9933608cc 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,15 @@ source $HOME/.local/bin/env uv sync --all-extras ``` +4.1. On aarch64 hosts: build flash-attn from source for your GPU + +> *NOTE*: aarch64 has no prebuilt flash-attn wheel. This step compiles the CUDA extension for your local GPU (~20-30 minutes). Compute capability is auto-detected from `nvidia-smi`; override with `TORCH_CUDA_ARCH_LIST=9.0` (Hopper) / `10.0` (Blackwell) if needed. +> *NOTE*: After this step, you can't run `uv sync --all-extras` or `uv run` as it will uninstall the package, you can avoid it by running `uv sync --inexact` or `uv run --no-sync`. + +```bash +bash scripts/docker-arm64-post-install.sh +``` + 3.1. Optional: Install Flash Attention 3 (on Hopper GPUs only, for flash_attention_3 attention backend) > *NOTE*: This step will take a while, as it builds the Flash Attention 3 extension from source, as it has no wheels prebuilt. diff --git a/scripts/docker-arm64-post-install.sh b/scripts/docker-arm64-post-install.sh index f02b3070b3..55f85a3a03 100755 --- a/scripts/docker-arm64-post-install.sh +++ b/scripts/docker-arm64-post-install.sh @@ -1,17 +1,44 @@ #!/bin/bash -# arm64 post-install fixups for Docker builds. -set -e +# arm64 post-install fixups: rebuild flash-attn from source for the target GPU. +# +# Why this exists: pyproject.toml sets FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE to keep +# `uv sync` fast; on x86_64 it pins a prebuilt wheel to fill in the binary, but no +# such wheel exists for aarch64. Without this script, `import flash_attn` fails on +# aarch64 with `ModuleNotFoundError: No module named 'flash_attn_2_cuda'`. +# +# Defaults preserve the existing Docker behavior (sm_100 / GB200). On a host with +# `nvidia-smi` available, the compute capability is auto-detected from the local +# GPU. Override via env vars if needed: +# TORCH_CUDA_ARCH_LIST e.g. 9.0 (Hopper), 10.0 (Blackwell) +# VENV_PATH path to the venv (default: $(pwd)/.venv) +# MAX_JOBS parallel nvcc jobs (default: 4) +set -euo pipefail -echo "=== building flash-attn from source (sm_100 / GB200) ===" -# Run from /tmp so uv doesn't read pyproject.toml's [tool.uv.extra-build-variables] -# which sets FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE and prevents CUDA kernel compilation. -export TORCH_CUDA_ARCH_LIST="10.0" -export MAX_JOBS=4 +if [ -z "${TORCH_CUDA_ARCH_LIST:-}" ]; then + # Try to detect from the local GPU. Tolerate any failure mode (binary missing, + # driver not loaded, Docker buildx without --gpus) and fall back to GB200. + TORCH_CUDA_ARCH_LIST="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d ' ' || true)" + : "${TORCH_CUDA_ARCH_LIST:=10.0}" +fi +export TORCH_CUDA_ARCH_LIST + +VENV_PATH="${VENV_PATH:-$(pwd)/.venv}" +if [ ! -x "$VENV_PATH/bin/python" ]; then + echo "ERROR: no python at $VENV_PATH/bin/python. Run from the project root or set VENV_PATH." >&2 + exit 1 +fi + +export MAX_JOBS="${MAX_JOBS:-4}" export FLASH_ATTENTION_FORCE_BUILD=TRUE export FLASH_ATTENTION_SKIP_CUDA_BUILD=FALSE -(cd /tmp && uv pip install --python /app/.venv/bin/python \ - "flash-attn==2.8.3" --no-build-isolation --no-binary flash-attn --no-cache) + +echo "=== building flash-attn from source (TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST, MAX_JOBS=$MAX_JOBS) ===" +echo " target venv: $VENV_PATH" +# Run from /tmp so uv ignores the project's [tool.uv.extra-build-variables], +# which sets FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE and would prevent kernel compilation. +(cd /tmp && uv pip install --python "$VENV_PATH/bin/python" \ + "flash-attn==2.8.3" --no-build-isolation --no-binary flash-attn --no-cache --reinstall-package flash-attn) echo "=== reinstalling flash-attn-cute (flash-attn overwrites it with a stub) ===" -uv pip install --reinstall --no-deps \ +uv pip install --python "$VENV_PATH/bin/python" --reinstall --no-deps \ "flash-attn-4 @ git+https://github.com/Dao-AILab/flash-attention.git@96bd151#subdirectory=flash_attn/cute" diff --git a/scripts/install.sh b/scripts/install.sh index 7e03b5aa71..630bf1d576 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -159,6 +159,16 @@ main() { log_info "Installing pre-commit hooks..." uv run pre-commit install + # aarch64 has no prebuilt flash-attn wheel; build it from source for the local GPU. + # Without this, `import flash_attn` fails with `ModuleNotFoundError: flash_attn_2_cuda`. + # Run last so no subsequent uv operation (which implicitly syncs against the lockfile) + # rebuilds flash-attn from PyPI with FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE and undoes this. + if [ "$(uname -m)" = "aarch64" ]; then + log_info "aarch64 detected: building flash-attn from source (this takes 20-30 minutes)..." + log_warn "Future 'uv sync --all-extras' or 'uv run' will remove this build. Use 'uv sync --inexact' or 'uv run --no-sync' to keep it." + bash scripts/docker-arm64-post-install.sh + fi + log_info "Installation completed!" } From 766e36f00092bd8b71f19d7a268ef8e836eba044 Mon Sep 17 00:00:00 2001 From: Erik Schultheis <7938269+ngc92@users.noreply.github.com> Date: Sun, 7 Jun 2026 17:51:36 +0200 Subject: [PATCH 02/12] Feat/fp8 fused transpose cast (#2724) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(fp8): fuse transpose into the dX block-fp8 weight cast The dX backward built `weight.transpose(0, 1).contiguous()` and re-cast it to fp8 every step, materializing a full bf16 transpose buffer plus an extra read/write pass. Add `per_block_cast_to_fp8_tp_triton`, which produces the block-fp8 of `weight.T` directly by reusing the existing per-block kernel with swapped output/scale strides — no intermediate buffer. 128x128 block quantization is transpose-symmetric, so the result is bit-identical to casting the materialized transpose; DeepGEMM receives an identical B tensor. Verified byte-for-byte across shapes; ~14x faster on a 4096x4096 weight (373 -> 27 us). Co-Authored-By: Claude Opus 4.8 (1M context) * deslopified * also add fused implementation for per-token * Fix: skip tests on Co-authored-by: S1ro1 --- .../trainer/models/kernels/fp8_utils.py | 68 +++++++++++++++++++ .../trainer/models/layers/fp8_linear.py | 11 ++- tests/unit/train/models/test_fp8_utils.py | 49 +++++++++++++ 3 files changed, 122 insertions(+), 6 deletions(-) create mode 100644 tests/unit/train/models/test_fp8_utils.py diff --git a/src/prime_rl/trainer/models/kernels/fp8_utils.py b/src/prime_rl/trainer/models/kernels/fp8_utils.py index 82c26fb7e4..4595640f28 100644 --- a/src/prime_rl/trainer/models/kernels/fp8_utils.py +++ b/src/prime_rl/trainer/models/kernels/fp8_utils.py @@ -550,3 +550,71 @@ def per_block_cast_to_fp8_triton( gran_k, ) return out[0], sf[0] + + +def per_block_cast_to_fp8_tp_triton( + x: torch.Tensor, use_ue8m0: bool, gran_k: int = GROUP_ALIGNMENT +) -> Tuple[torch.Tensor, torch.Tensor]: + """Block-fp8 cast of ``x.T`` without materializing the transpose.""" + assert x.dim() == 2 + assert gran_k == GROUP_ALIGNMENT + rows, cols = x.shape + x3 = x.unsqueeze(0) + out = torch.empty((cols, rows), device=x.device, dtype=torch.float8_e4m3fn) + sf = torch.empty((ceil_div(cols, gran_k), ceil_div(rows, gran_k)), device=x.device, dtype=torch.float32) + grid = (1, ceil_div(rows, gran_k), ceil_div(cols, gran_k)) + _grouped_per_block_fp8_kernel[grid]( + x3, + out, + sf, + 1, + rows, + cols, + x3.stride(0), + x3.stride(1), + x3.stride(2), + # transposed output: x's element (row, col) lands at out[col, row] + cols * rows, + 1, + rows, + # transposed scales: x's tile (pid_m, pid_n) lands at sf[pid_n, pid_m] + ceil_div(cols, gran_k) * ceil_div(rows, gran_k), + 1, + ceil_div(rows, gran_k), + USE_UE8M0=use_ue8m0, + BLOCK_M=gran_k, + BLOCK_N=gran_k, + num_warps=8, + ) + return out, sf + + +def per_token_cast_to_fp8_tp_triton( + x: torch.Tensor, use_ue8m0: bool, gran_k: int = GROUP_ALIGNMENT +) -> Tuple[torch.Tensor, torch.Tensor]: + """Per-token fp8 cast of ``x.T`` without materializing the transpose.""" + assert x.dim() == 2 + assert gran_k == GROUP_ALIGNMENT + rows, cols = x.shape + out = torch.empty((cols, rows), device=x.device, dtype=torch.float8_e4m3fn) + sf = torch.empty((cols, ceil_div(rows, gran_k)), device=x.device, dtype=torch.float32) + grid = lambda meta: (ceil_div(cols, meta["BLOCK_M"]), ceil_div(rows, meta["BLOCK_K"])) + _per_token_fp8_kernel[grid]( + x, + out, + sf, + cols, + rows, + # transposed read: the kernel's per-row amax reduces over x's rows + x.stride(1), + x.stride(0), + out.stride(0), + out.stride(1), + sf.stride(0), + sf.stride(1), + USE_UE8M0=use_ue8m0, + BLOCK_M=8, + BLOCK_K=gran_k, + num_warps=4, + ) + return out, sf diff --git a/src/prime_rl/trainer/models/layers/fp8_linear.py b/src/prime_rl/trainer/models/layers/fp8_linear.py index 5f7f675dc3..0cd20bf936 100644 --- a/src/prime_rl/trainer/models/layers/fp8_linear.py +++ b/src/prime_rl/trainer/models/layers/fp8_linear.py @@ -12,7 +12,9 @@ from torch import nn from prime_rl.trainer.models.kernels.fp8_utils import ( + per_block_cast_to_fp8_tp_triton, per_block_cast_to_fp8_triton, + per_token_cast_to_fp8_tp_triton, per_token_cast_to_fp8_triton, ) from prime_rl.utils.logger import get_logger @@ -43,8 +45,7 @@ def backward(ctx, grad_output): grad_x = grad_weight = None if ctx.needs_input_grad[0]: grad_output_fp8 = per_token_cast_to_fp8_triton(grad_output_2d, False, block_size) - weight_t = weight.transpose(0, 1).contiguous() - weight_dx_fp8 = per_block_cast_to_fp8_triton(weight_t, False, block_size) + weight_dx_fp8 = per_block_cast_to_fp8_tp_triton(weight, False, block_size) grad_x_2d = torch.empty_like(x_2d) deep_gemm.fp8_gemm_nt(grad_output_fp8, weight_dx_fp8, grad_x_2d) grad_x = grad_x_2d.reshape(ctx.x_shape) @@ -62,10 +63,8 @@ def backward(ctx, grad_output): else: grad_output_2d_padded = grad_output_2d x_2d_padded = x_2d - grad_output_t = grad_output_2d_padded.transpose(0, 1).contiguous() - x_t = x_2d_padded.transpose(0, 1).contiguous() - grad_output_t_fp8 = per_token_cast_to_fp8_triton(grad_output_t, False, block_size) - x_t_fp8 = per_token_cast_to_fp8_triton(x_t, False, block_size) + grad_output_t_fp8 = per_token_cast_to_fp8_tp_triton(grad_output_2d_padded, False, block_size) + x_t_fp8 = per_token_cast_to_fp8_tp_triton(x_2d_padded, False, block_size) grad_weight_fp32 = torch.zeros_like(weight, dtype=torch.float32) deep_gemm.fp8_gemm_nt( grad_output_t_fp8, diff --git a/tests/unit/train/models/test_fp8_utils.py b/tests/unit/train/models/test_fp8_utils.py new file mode 100644 index 0000000000..f885f1076a --- /dev/null +++ b/tests/unit/train/models/test_fp8_utils.py @@ -0,0 +1,49 @@ +import pytest +import torch + +from prime_rl.trainer.models.kernels.fp8_utils import ( + per_block_cast_to_fp8_tp_triton, + per_block_cast_to_fp8_triton, + per_token_cast_to_fp8_tp_triton, + per_token_cast_to_fp8_triton, +) + +pytestmark = [ + pytest.mark.gpu, + pytest.mark.skipif( + not torch.cuda.is_available() or torch.cuda.get_device_capability()[0] < 9, + reason="block-fp8 cast kernels use Triton fp8e4nv (e4m3), only supported on Hopper (SM90) and newer", + ), +] + + +@pytest.mark.parametrize("rows,cols", [(256, 256), (256, 512), (512, 256), (1024, 768), (384, 128)]) +def test_block_tp_cast_matches_materialized_transpose(rows, cols): + """The fused transpose+cast is *bit-identical* to unfused.""" + torch.manual_seed(rows + cols) + x = torch.randn(rows, cols, device="cuda", dtype=torch.bfloat16) * 0.3 + + ref_q, ref_s = per_block_cast_to_fp8_triton(x.transpose(0, 1).contiguous(), False) + tp_q, tp_s = per_block_cast_to_fp8_tp_triton(x, False) + + assert tp_q.shape == ref_q.shape == (cols, rows) + assert tp_s.shape == ref_s.shape + assert tp_q.is_contiguous() + assert torch.equal(tp_q.view(torch.uint8), ref_q.view(torch.uint8)) + assert torch.equal(tp_s, ref_s) + + +@pytest.mark.parametrize("rows,cols", [(256, 512), (512, 256), (128, 1024), (1024, 768), (384, 512)]) +def test_token_tp_cast_matches_materialized_transpose(rows, cols): + """The fused transpose+cast is *bit-identical* to unfused.""" + torch.manual_seed(rows + cols) + x = torch.randn(rows, cols, device="cuda", dtype=torch.bfloat16) * 0.3 + + ref_q, ref_s = per_token_cast_to_fp8_triton(x.transpose(0, 1).contiguous(), False) + tp_q, tp_s = per_token_cast_to_fp8_tp_triton(x, False) + + assert tp_q.shape == ref_q.shape == (cols, rows) + assert tp_s.shape == ref_s.shape + assert tp_q.is_contiguous() + assert torch.equal(tp_q.view(torch.uint8), ref_q.view(torch.uint8)) + assert torch.equal(tp_s, ref_s) From 90b074492ea4ef29e31b4d184b099d7878d8c789 Mon Sep 17 00:00:00 2001 From: Matej Sirovatka <54212263+S1ro1@users.noreply.github.com> Date: Mon, 8 Jun 2026 08:58:23 -0700 Subject: [PATCH 03/12] Feat: fix weight reload to cpu optim (#2729) --- src/prime_rl/trainer/ckpt.py | 74 +++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/src/prime_rl/trainer/ckpt.py b/src/prime_rl/trainer/ckpt.py index dd23ae345e..d699b5f097 100644 --- a/src/prime_rl/trainer/ckpt.py +++ b/src/prime_rl/trainer/ckpt.py @@ -9,7 +9,7 @@ import torch from torch import Tensor, nn -from torch.distributed.checkpoint.state_dict import get_state_dict, set_state_dict +from torch.distributed.checkpoint.state_dict import get_state_dict, set_model_state_dict, set_state_dict from torch.distributed.checkpoint.state_dict_loader import load as dcp_load from torch.distributed.checkpoint.state_dict_saver import save as dcp_save from torch.distributed.checkpoint.stateful import Stateful @@ -67,8 +67,13 @@ def _get_base_optimizers(self) -> list[Optimizer]: """Extract base optimizers from wrappers like CPUOffloadOptimizer.""" return [opt.base_optimizer if isinstance(opt, CPUOffloadOptimizer) else opt for opt in self.optimizers] + def _has_cpu_offload(self) -> bool: + return any(isinstance(opt, CPUOffloadOptimizer) for opt in self.optimizers) + def state_dict(self) -> dict[str, Any]: - # Move CPU-offloaded states to GPU before checkpointing + # get_state_dict requires optimizer states to live on param.device. For an + # already-initialized CPU-offload optimizer that means staging back to GPU + # before the call; the matching offload happens after the dict is built. for opt in self.optimizers: if isinstance(opt, CPUOffloadOptimizer) and opt._initialized: opt._move_states("cuda") @@ -88,26 +93,53 @@ def state_dict(self) -> dict[str, Any]: progress_state_dict = asdict(self.progress) state_dict["progress"] = progress_state_dict - # Move states back to CPU + # Offload optimizer states to CPU for every CPUOffloadOptimizer, including + # ones that were uninitialized on entry. dcp_load calls this method to build + # a template, and get_state_dict's internal _init_optim_state populates an + # empty optim.state with GPU tensors. Optimizer.state_dict() returns those + # values via shallow copy, so optimizer_state_dict["state"][fqn] is the same + # dict object as optim.state[param]. Replacing the entries with CPU tensors + # in place therefore flips the template too — dcp_load reads bytes from disk + # straight into CPU storage and optim.state is loaded by the time the load + # returns, without GPU optimizer state ever existing for the duration of the + # read. + has_cpu_offload = self._has_cpu_offload() for opt in self.optimizers: - if isinstance(opt, CPUOffloadOptimizer) and opt._initialized: + if isinstance(opt, CPUOffloadOptimizer): opt._move_states("cpu") + if has_cpu_offload: + gc.collect() + torch.cuda.empty_cache() return state_dict def load_state_dict(self, state_dict: dict[str, Any]): base_optimizers = self._get_base_optimizers() - set_state_dict( - self.model, base_optimizers, model_state_dict=state_dict["model"], optim_state_dict=state_dict["optimizers"] - ) + has_cpu_offload = self._has_cpu_offload() - # Re-initialize CPU offload wrappers after loading - has_cpu_offload = False - for opt in self.optimizers: - if isinstance(opt, CPUOffloadOptimizer): - opt._move_states("cpu") - opt._initialized = True - has_cpu_offload = True + if has_cpu_offload: + # When CPU offload is on, the optimizer is already loaded by the time we + # get here: state_dict() handed dcp_load a template whose tensors share + # storage with optim.state[p][k], and dcp_load wrote the checkpoint bytes + # directly into those tensors via target_tensor.copy_(...). Running + # set_state_dict on the optimizer would route the loaded CPU values + # through Optimizer.load_state_dict, whose _cast hook does + # value.to(param.dtype, param.device) and would allocate a fresh GPU + # copy of every state tensor — undoing the in-place CPU load and + # detaching optim.state from the tensors we just populated. So we only + # apply the model side here and flip the wrappers to initialized so + # subsequent steps take the steady-state path. + set_model_state_dict(self.model, model_state_dict=state_dict["model"]) + for opt in self.optimizers: + if isinstance(opt, CPUOffloadOptimizer): + opt._initialized = True + else: + set_state_dict( + self.model, + base_optimizers, + model_state_dict=state_dict["model"], + optim_state_dict=state_dict["optimizers"], + ) if self.scheduler is not None: self.scheduler.load_state_dict(state_dict["scheduler"]) @@ -115,15 +147,13 @@ def load_state_dict(self, state_dict: dict[str, Any]): for key, value in state_dict["progress"].items(): setattr(self.progress, key, value) - # Reclaim GPU memory freed by moving optimizer states to CPU. - # After set_state_dict + _move_states("cpu"), the optimizer states live on CPU, - # but the state_dict (owned by dcp_load) still holds references to stale GPU - # optimizer tensors. Clearing them and flushing the CUDA cache prevents OOM on - # the first training step. + # state_dict is the same dict object that dcp_load held internally; clearing + # it drops the last references to the loaded tensor wrappers so the cuda + # allocator can release whatever blocks it cached during the read. if has_cpu_offload: - state_dict.clear() # drop stale GPU tensor references from dcp_load - gc.collect() # break any circular references so tensors are freed - torch.cuda.empty_cache() # return freed GPU memory to CUDA + state_dict.clear() + gc.collect() + torch.cuda.empty_cache() class CheckpointManager: From c2a5fa466c358d150a65b8b18e2e2197b5231cfa Mon Sep 17 00:00:00 2001 From: minh hoang <13672394+eexwhyzee@users.noreply.github.com> Date: Mon, 8 Jun 2026 15:16:58 -0700 Subject: [PATCH 04/12] chore(renderers): bump to submodule to renderers-v0.1.8.dev41 (#2732) --- deps/renderers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/renderers b/deps/renderers index 596c15ffd9..e6dba5ad6c 160000 --- a/deps/renderers +++ b/deps/renderers @@ -1 +1 @@ -Subproject commit 596c15ffd9da779290bfd0fdcad520688de14a4e +Subproject commit e6dba5ad6c50ca83d4ffa462145037082542e52a From 53b22b55c8bcc6bcdc13ce42c7830f18aee2668b Mon Sep 17 00:00:00 2001 From: samsja <55492238+samsja@users.noreply.github.com> Date: Mon, 8 Jun 2026 16:34:24 -0700 Subject: [PATCH 05/12] fix: add prime-pydantic-config as direct dep so uv uses editable path source (#2733) The [tool.uv.sources] override for prime-pydantic-config was being ignored because it was only a transitive dependency (via prime-rl-configs). uv only applies source overrides for packages that appear in project.dependencies. Adding it as a direct dependency makes uv resolve from the local editable path (deps/pydantic-config) instead of PyPI. --- pyproject.toml | 1 + uv.lock | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 625a879d5a..b5f2a2d2dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" requires-python = "~=3.12.0" dependencies = [ "prime-rl-configs", + "prime-pydantic-config", "beartype>=0.21.0", "datasets>=4.0.0", "jaxtyping>=0.3.2", diff --git a/uv.lock b/uv.lock index 5c15a1668a..03e1a1ddae 100644 --- a/uv.lock +++ b/uv.lock @@ -3476,21 +3476,35 @@ wheels = [ [[package]] name = "prime-pydantic-config" -version = "0.3.0.dev86" -source = { registry = "https://pypi.org/simple" } +version = "0.3.0" +source = { editable = "deps/pydantic-config" } dependencies = [ { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ad/34/006fc720a8fcda84706793582d50a2028bf6950fb7a0eedb59d3f6555261/prime_pydantic_config-0.3.0.dev86.tar.gz", hash = "sha256:1139bb6d21a8cf134e212ee4e529e5150f2db7422b42eae3ca69a5c77b8a69f5", size = 75656, upload-time = "2026-06-02T01:08:19.079Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/a3/ded48c436cd56ddac3b216458ac458eaf069b55e0ca3be506b2508d16fa2/prime_pydantic_config-0.3.0.dev86-py3-none-any.whl", hash = "sha256:51ac33ae1b5de9ba2e44eb9a91242d9dd783784234942f166f6e8974bcdf1577", size = 27437, upload-time = "2026-06-02T01:08:20.23Z" }, -] [package.optional-dependencies] toml = [ { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] +[package.metadata] +requires-dist = [ + { name = "pydantic", specifier = ">=2.0.0" }, + { name = "pyyaml", marker = "extra == 'all'" }, + { name = "pyyaml", marker = "extra == 'yaml'" }, + { name = "tomli", marker = "extra == 'all'" }, + { name = "tomli", marker = "extra == 'toml'" }, +] +provides-extras = ["yaml", "toml", "all"] + +[package.metadata.requires-dev] +dev = [ + { name = "pre-commit", specifier = ">=3.0.0" }, + { name = "pytest", specifier = ">=9.0.3" }, + { name = "rich", specifier = ">=15.0.0" }, + { name = "ruff", specifier = ">=0.12.1" }, +] + [[package]] name = "prime-rl" version = "0.5.0" @@ -3509,6 +3523,7 @@ dependencies = [ { name = "nvidia-ml-py", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "prime-pydantic-config", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-rl-configs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pyarrow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pybase64", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -3662,6 +3677,7 @@ requires-dist = [ { name = "opencode-science", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_science" }, { name = "opencode-swe", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_swe" }, { name = "prime", specifier = ">=0.6.4" }, + { name = "prime-pydantic-config", editable = "deps/pydantic-config" }, { name = "prime-rl", extras = ["disagg"], marker = "extra == 'all'" }, { name = "prime-rl", extras = ["flash-attn"], marker = "extra == 'all'" }, { name = "prime-rl", extras = ["flash-attn-3"], marker = "extra == 'all'" }, From 54012df504f30f66f6d6094da73a222494d7e867 Mon Sep 17 00:00:00 2001 From: faresobeid <111092724+faresobeid@users.noreply.github.com> Date: Tue, 9 Jun 2026 01:48:12 +0100 Subject: [PATCH 06/12] orch improvements (#2725) * orch improvements * fixes --- pyproject.toml | 1 + src/prime_rl/orchestrator/dispatcher.py | 43 +++--- src/prime_rl/orchestrator/envs.py | 151 +--------------------- src/prime_rl/orchestrator/orchestrator.py | 22 +++- src/prime_rl/transport/zmq.py | 9 +- uv.lock | 2 + 6 files changed, 50 insertions(+), 178 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b5f2a2d2dd..719b1228e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "aiolimiter>=1.2.1", "tenacity>=8.2.0", "openai>=1.106.1", + "orjson>=3.11.0", "rich>=14.0.0", "setproctitle>=1.3.0", "uvloop>=0.21.0", diff --git a/src/prime_rl/orchestrator/dispatcher.py b/src/prime_rl/orchestrator/dispatcher.py index 133bc08da0..dcc5455daa 100644 --- a/src/prime_rl/orchestrator/dispatcher.py +++ b/src/prime_rl/orchestrator/dispatcher.py @@ -12,7 +12,9 @@ source's emptiness), so in-flight rollouts of the opposite kind drain naturally on either side of an eval boundary. - ``on_new_version`` (called by the watcher) bumps ``off_policy_steps`` on - every in-flight rollout and drops groups past ``max_off_policy_steps``. + in-flight train rollouts and drops groups past ``max_off_policy_steps``. + Eval rollouts are measurements for the policy version they started with, + so they are allowed to finish even if training advances. Cancellations surface as synthetic ``Cancelled`` markers so the sink's count-to-``group_size`` finalization still fires. """ @@ -262,32 +264,33 @@ async def stop(self) -> None: async def on_new_version(self, step: int) -> None: """Bump off-policy counters and drop groups past ``max_off_policy_steps`` (drop_group emits ``Cancelled`` markers so - the sink still finalizes the partial group).""" - stale_groups: dict[uuid.UUID, RolloutKind] = {} - cancelled_by_kind: dict[RolloutKind, int] = {"train": 0, "eval": 0} + the sink still finalizes the partial group). Eval rollouts are not + aged because they are tied to their start-time policy version.""" + stale_groups: set[uuid.UUID] = set() + cancelled = 0 for meta in self.inflight.values(): + if meta.kind != "train": + continue meta.off_policy_steps += 1 if meta.off_policy_steps > self.max_off_policy_steps: - stale_groups[meta.group_id] = meta.kind + stale_groups.add(meta.group_id) - for gid, kind in stale_groups.items(): + for gid in stale_groups: removed = await self.drop_group(gid) - cancelled_by_kind[kind] += removed + cancelled += removed - for kind in ("train", "eval"): - n = cancelled_by_kind[kind] - if n: - get_logger().warning( - f"Cancelled {n} {kind} rollouts past max_off_policy_steps={self.max_off_policy_steps}. " - "Consider increasing it to avoid this." - ) + if cancelled: + get_logger().warning( + f"Cancelled {cancelled} train rollouts past max_off_policy_steps={self.max_off_policy_steps}. " + "Consider increasing it to avoid this." + ) async def fill_inflight(self) -> None: """Schedule new rollouts up to ``max_inflight``, honoring - ``self.mode``. When ``PREFER_EVAL``'s source exhausts we flip back - to ``PREFER_TRAIN`` so the eval tail drains alongside fresh train.""" - if not self.dispatch_allowed.is_set(): - return + ``self.mode``. Eval scheduling ignores the orchestrator's dispatch + gate (evals are version-pinned measurements); only train scheduling + respects it. When ``PREFER_EVAL``'s source exhausts we flip back to + ``PREFER_TRAIN`` so the eval tail drains alongside fresh train.""" while True: if self.available_permits <= 0: return @@ -308,7 +311,9 @@ async def fill_inflight(self) -> None: scheduled = await self.try_schedule("eval") if not scheduled: return - else: # PREFER_TRAIN + else: # PREFER_TRAIN — respects the orchestrator's dispatch gate + if not self.dispatch_allowed.is_set(): + return scheduled = await self.try_schedule("train") if not scheduled: return diff --git a/src/prime_rl/orchestrator/envs.py b/src/prime_rl/orchestrator/envs.py index fe02d2e61a..8d3f072720 100644 --- a/src/prime_rl/orchestrator/envs.py +++ b/src/prime_rl/orchestrator/envs.py @@ -1,24 +1,18 @@ from __future__ import annotations -import asyncio import atexit import multiprocessing as mp -import time -from collections.abc import Awaitable, Callable, Iterator, Sequence +from collections.abc import Iterator, Sequence from multiprocessing.process import BaseProcess from pathlib import Path from typing import Generic, TypeVar -import pandas as pd import verifiers as vf from verifiers.serve import ZMQEnvClient, ZMQEnvServer from verifiers.utils.serve_utils import get_free_port from prime_rl.configs.orchestrator import EnvConfig, EvalEnvConfig, TrainEnvConfig -from prime_rl.orchestrator.eval_utils import compute_pass_at_k -from prime_rl.utils.logger import ProgressTracker, get_logger -from prime_rl.utils.monitor import get_monitor -from prime_rl.utils.utils import capitalize +from prime_rl.utils.logger import get_logger REQUIRED_STATE_COLUMNS = ["trajectory"] @@ -183,147 +177,6 @@ def __init__(self, config: EvalEnvConfig): self.sampling_args = config.sampling.to_sampling_args() self.examples = self.env.get_eval_dataset(n=config.num_examples).to_list() - async def evaluate( - self, - model_name: str, - get_client: Callable[[], Awaitable[vf.ClientConfig]], - step: int, - cache_salt: str, - ) -> list[vf.RolloutOutput]: - num_examples = len(self.examples) - group_size = self.config.group_size - get_logger().info(f"Evaluating {self.name} ({num_examples=}, {group_size=})") - total_rollouts = num_examples * group_size - pbar = ProgressTracker(total=total_rollouts, desc=f"Evaluating {self.name}") - eval_start = time.perf_counter() - - if self.requires_group_scoring: - - async def run_with_progress(example: dict) -> list[vf.RolloutOutput] | None: - """Run group_size rollouts as a scored group for one example.""" - try: - client = await get_client() - outputs = await self.run_group( - client=client, - example=example, - model_name=model_name, - group_size=group_size, - cache_salt=cache_salt, - ) - pbar.update(group_size) - return outputs - except Exception as e: - get_logger().warning(f"Group failed: {e}") - pbar.update(group_size) - return None - - coros = [run_with_progress(example) for example in self.examples] - - else: - - async def run_with_progress(example: dict) -> list[vf.RolloutOutput] | None: - """Run a single rollout for one example.""" - try: - client = await get_client() - output = await self.run_rollout( - client=client, example=example, model_name=model_name, cache_salt=cache_salt - ) - pbar.update(1) - return [output] - except Exception as e: - get_logger().warning(f"Rollout failed: {e}") - pbar.update(1) - return None - - coros = [run_with_progress(example) for example in self.examples for _ in range(group_size)] - - try: - results = await asyncio.gather(*coros) - finally: - pbar.close() - - successful_outputs = [o for group in results if group is not None for o in group] - failed_count = total_rollouts - len(successful_outputs) - eval_time = time.perf_counter() - eval_start - - if failed_count: - get_logger().warning( - f"{failed_count}/{total_rollouts} ({failed_count / total_rollouts * 100:.1f}%) rollouts failed" - ) - - if not successful_outputs: - get_logger().warning(f"All rollouts failed for {self.name}, skipping logging metrics") - get_monitor().log( - { - f"eval/{self.name}/failed_rollouts": failed_count / total_rollouts, - "step": step, - }, - step=step, - ) - return [] - - # Log metrics - monitor = get_monitor() - - rows = [ - { - "example_id": o["example_id"], - "reward": o["reward"], - "completion_len": o["token_usage"]["final_output_tokens"], - "is_truncated": o["is_truncated"], - "has_error": o.get("error") is not None, - "no_response": not o.get("completion"), - } - for o in successful_outputs - ] - results_df = pd.DataFrame(rows) - - unique_rewards = results_df.reward.dropna().unique() - could_be_binary = set(unique_rewards).issubset({0.0, 1.0}) - if could_be_binary: - pass_at_k = ( - results_df.groupby("example_id") - .apply(lambda x: compute_pass_at_k(x.reward.dropna()), include_groups=False) - .apply(pd.Series) - ) - else: - pass_at_k = None - get_logger().warning("Skipping computing pass@k rates because the task rewards appear to be non-binary") - - message = f"Evaluated {self.name} in {eval_time:.2f}s (Avg@{group_size}={results_df.reward.mean():.4f}" - if could_be_binary: - assert pass_at_k is not None - for pass_rate, pass_rate_score in pd.Series(pass_at_k.mean()).items(): - message += f", {capitalize(str(pass_rate))}: {pass_rate_score:.4f}" - - message += ( - f", No-response: {results_df.no_response.mean() * 100:.1f}%" - f", Completion Length: {results_df.completion_len.mean():.2f} (±{results_df.completion_len.std():.2f}, ∈[{results_df.completion_len.min():.2f}, {results_df.completion_len.max():.2f}])" - f", Truncated: {results_df.is_truncated.mean() * 100:.1f}%)" - ) - get_logger().success(message) - - eval_metrics = { - f"avg@{group_size}": float(results_df.reward.mean()), - "no_response/mean": float(results_df.no_response.mean()), - "no_response/count": int(results_df.no_response.sum()), - "completion_len/mean": results_df.completion_len.mean().item(), - "completion_len/max": results_df.completion_len.max().item(), - "completion_len/min": results_df.completion_len.min().item(), - "is_truncated/mean": results_df.is_truncated.mean().item(), - "failed_rollouts": failed_count / total_rollouts, - "time": eval_time, - } - if could_be_binary: - assert pass_at_k is not None - eval_metrics.update(pd.Series(pass_at_k.mean()).to_dict()) - eval_metrics = {f"eval/{self.name}/{key}": v for key, v in eval_metrics.items()} - eval_metrics["step"] = step - monitor.log(eval_metrics, step=step) - monitor.log_eval_samples(successful_outputs, env_name=self.name, step=step) - - return successful_outputs - EnvT = TypeVar("EnvT", bound=Env) diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py index 902c8b963b..d685b519b6 100644 --- a/src/prime_rl/orchestrator/orchestrator.py +++ b/src/prime_rl/orchestrator/orchestrator.py @@ -499,7 +499,7 @@ async def main_loop(self) -> None: assert self.eval_sink is not None # eval rollouts only emitted when eval is configured eval_batch = self.eval_sink.add(rollout) if eval_batch is not None: - self.finalize_eval_batch(eval_batch) + await self.finalize_eval_batch(eval_batch) continue assert isinstance(rollout, TrainRollout) @@ -761,7 +761,7 @@ def log_train_batch(self, batch: TrainBatch, *, step: int, step_time: float) -> ) get_logger().success("\n\t\t ".join(lines)) - def finalize_eval_batch(self, batch: EvalBatch) -> None: + async def finalize_eval_batch(self, batch: EvalBatch) -> None: """Persist + log one completed eval epoch (save_rollouts, monitor.log_eval_samples, monitor.log).""" if not batch.rollouts: @@ -770,24 +770,32 @@ def finalize_eval_batch(self, batch: EvalBatch) -> None: rollout_dicts = [r.to_dict() for r in batch.rollouts] step_path = get_step_path(get_rollout_dir(self.config.output_dir), batch.step) - save_rollouts( + await asyncio.to_thread( + save_rollouts, rollout_dicts, step_path / f"eval_rollouts_{batch.env_name}.jsonl", exclude_keys={"trajectory"}, ) self.monitor.log_eval_samples(rollout_dicts, env_name=batch.env_name, step=batch.step) - self.monitor.log(batch.metrics.to_wandb_dict(env_name=batch.env_name, step=batch.step), step=batch.step) + policy_versions = {r.policy_version for r in batch.rollouts} + policy_version = min(policy_versions) + if len(policy_versions) > 1: + get_logger().warning( + f"Eval {batch.env_name} step {batch.step} had mixed policy versions: {sorted(policy_versions)}" + ) + metrics = batch.metrics.to_wandb_dict(env_name=batch.env_name, step=batch.step) + metrics[f"eval/{batch.env_name}/policy_version"] = float(policy_version) + self.monitor.log(metrics, step=batch.step) n_total = batch.metrics.n_rollouts error_rate = ((batch.metrics.n_cancelled + batch.metrics.n_errored) / n_total) if n_total else 0.0 - max_off_policy = max((r.off_policy_steps for r in batch.rollouts), default=0) triggered_at = self.eval_triggered_at.pop((batch.env_name, batch.step), None) elapsed = (time.perf_counter() - triggered_at) if triggered_at is not None else 0.0 get_logger().success( f"Evaluated {batch.env_name} (Step {batch.step}) | " - f"{format_time(elapsed):>7} | Reward {batch.metrics.reward_mean:.4f} | " - f"Turns {batch.metrics.num_turns_mean:.1f} | Max Off-Policy {max_off_policy} | " + f"Policy v{policy_version} | {format_time(elapsed):>7} | Reward {batch.metrics.reward_mean:.4f} | " + f"Turns {batch.metrics.num_turns_mean:.1f} | " f"Error {error_rate:.1%} | Truncation {batch.metrics.truncation_rate:.1%}" ) diff --git a/src/prime_rl/transport/zmq.py b/src/prime_rl/transport/zmq.py index 5577b11e50..964017ddd3 100644 --- a/src/prime_rl/transport/zmq.py +++ b/src/prime_rl/transport/zmq.py @@ -2,6 +2,7 @@ from time import time import zmq +import zmq.asyncio from prime_rl.configs.shared import ZMQTransportConfig from prime_rl.trainer.runs import get_multi_run_manager @@ -20,8 +21,10 @@ class ZMQTrainingBatchSender(TrainingBatchSender): def __init__(self, output_dir: Path, transport: ZMQTransportConfig): super().__init__(output_dir) - self.context = zmq.Context.instance() - self.socket: zmq.Socket = self.context.socket(zmq.PUSH) + # Async context so ``send`` yields instead of blocking the orchestrator + # event loop when the trainer is slow and we hit SNDHWM. + self.context = zmq.asyncio.Context.instance() + self.socket: zmq.asyncio.Socket = self.context.socket(zmq.PUSH) self.socket.setsockopt(zmq.SNDHWM, transport.hwm) self.socket.connect(f"tcp://{transport.host}:{transport.port}") @@ -35,7 +38,7 @@ def __init__(self, output_dir: Path, transport: ZMQTransportConfig): async def send(self, batch: TrainingBatch) -> None: payload = self.encoder.encode(batch) self.logger.debug(f"Sending batch {batch.step} to {self.sender_id}") - self.socket.send_multipart([self.sender_id, payload], copy=False) + await self.socket.send_multipart([self.sender_id, payload], copy=False) def close(self) -> None: try: diff --git a/uv.lock b/uv.lock index 03e1a1ddae..3922f32b28 100644 --- a/uv.lock +++ b/uv.lock @@ -3522,6 +3522,7 @@ dependencies = [ { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "nvidia-ml-py", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "orjson", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-pydantic-config", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-rl-configs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -3676,6 +3677,7 @@ requires-dist = [ { name = "opencode-math", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_math" }, { name = "opencode-science", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_science" }, { name = "opencode-swe", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_swe" }, + { name = "orjson", specifier = ">=3.11.0" }, { name = "prime", specifier = ">=0.6.4" }, { name = "prime-pydantic-config", editable = "deps/pydantic-config" }, { name = "prime-rl", extras = ["disagg"], marker = "extra == 'all'" }, From e0f8a35c78423f044100814a2104e850e3c6e489 Mon Sep 17 00:00:00 2001 From: hallerite Date: Tue, 9 Jun 2026 05:05:13 +0200 Subject: [PATCH 07/12] feat(orchestrator): per-env advantage strategy (#2721) --- docs/algorithms.md | 16 ++++ .../src/prime_rl/configs/orchestrator.py | 96 ++++++++++--------- src/prime_rl/orchestrator/envs.py | 6 ++ src/prime_rl/orchestrator/orchestrator.py | 1 - src/prime_rl/orchestrator/train_sink.py | 10 +- 5 files changed, 78 insertions(+), 51 deletions(-) diff --git a/docs/algorithms.md b/docs/algorithms.md index fdd5b6e2da..0ffe69edba 100644 --- a/docs/algorithms.md +++ b/docs/algorithms.md @@ -167,6 +167,22 @@ kwargs = { eps = 1e-8 } `AdvantageInputs.rollouts` is a list of `verifiers.RolloutOutput`, so you have access to the full rollout (turns, tool calls, custom metadata) — not just the reward. Use this for anything reward-shaping-like that needs trajectory context. +### Per-Env Advantage + +`advantage` can be set per training environment. Each env inherits the top-level `[orchestrator.advantage]` when it doesn't set its own, so mixed-env runs can give each env its own advantage computation: + +```toml +[orchestrator.advantage] +type = "default" # the default every env inherits unless it overrides + +[[orchestrator.train.env]] +id = "math-env" # inherits the default above + +[[orchestrator.train.env]] +id = "agent-env" +advantage = { type = "custom", import_path = "my_module.normalized_advantage" } +``` + ## Filters Filters drop rollouts between scoring and training. Built-ins (composable): diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py index be5fe249f3..83f3fea7a6 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py @@ -143,6 +143,49 @@ def _deprecate_max_tokens(cls, data: Any) -> Any: return data +class TokensLengthPenaltyConfig(BaseConfig): + type: Literal["tokens"] = "tokens" + + completion_weight: float = Field(1.0, ge=0, allow_inf_nan=False) + """Weight on model completion tokens. Finite and non-negative.""" + + tool_response_weight: float = Field(1.0, ge=0, allow_inf_nan=False) + """Weight on tool-response tokens (read from the rollout's ``*_total_tool_response_tokens`` harness metric; 0 if absent). Finite and non-negative.""" + + +class TurnsLengthPenaltyConfig(BaseConfig): + type: Literal["turns"] = "turns" + + +LengthPenaltyConfig: TypeAlias = Annotated[ + TokensLengthPenaltyConfig | TurnsLengthPenaltyConfig, + Field(discriminator="type"), +] + + +class DefaultAdvantageConfig(BaseConfig): + type: Literal["default"] = "default" + + length_penalty: LengthPenaltyConfig | None = None + """Correctness-gated length penalty. ``tokens`` shapes by weighted token cost; ``turns`` shapes by trajectory turn count; None disables shaping. In mixed groups, lower-cost correct rollouts get amplified advantage (up to 2x), higher-cost correct rollouts are unchanged, incorrect untouched. In all-correct groups, below-average-cost rollouts get advantage in [0, 1], others get 0.""" + + +class CustomAdvantageConfig(BaseConfig): + type: Literal["custom"] = "custom" + + import_path: str + """Import path to the advantage function (e.g. ``my_module.my_advantage``).""" + + kwargs: dict[str, Any] = Field(default_factory=dict) + """Kwargs forwarded to the advantage function.""" + + +AdvantageConfig: TypeAlias = Annotated[ + DefaultAdvantageConfig | CustomAdvantageConfig, + Field(discriminator="type"), +] + + class EnvConfig(BaseConfig): id: str = "reverse-text" """Registered verifiers environment ID (e.g. ``math-env``, ``primeintellect/math-env``). May include an ``@version`` suffix for installation.""" @@ -214,6 +257,11 @@ class TrainEnvConfig(EnvConfig): """Rollouts generated per example for GRPO group-relative advantages. Inherits from ``orchestrator.group_size`` when unset.""" + advantage: AdvantageConfig | None = None + """Advantage strategy for this env's GRPO groups. Inherits from the top-level + ``orchestrator.advantage`` when unset; set a different ``default``/``custom`` + config to give this env its own advantage computation.""" + class EvalEnvConfig(EnvConfig): sampling: EvalSamplingConfig = EvalSamplingConfig() @@ -374,49 +422,6 @@ class CheckpointConfig(BaseConfig): """Skip loading the progress from checkpoint.""" -class TokensLengthPenaltyConfig(BaseConfig): - type: Literal["tokens"] = "tokens" - - completion_weight: float = Field(1.0, ge=0, allow_inf_nan=False) - """Weight on model completion tokens. Finite and non-negative.""" - - tool_response_weight: float = Field(1.0, ge=0, allow_inf_nan=False) - """Weight on tool-response tokens (read from the rollout's ``*_total_tool_response_tokens`` harness metric; 0 if absent). Finite and non-negative.""" - - -class TurnsLengthPenaltyConfig(BaseConfig): - type: Literal["turns"] = "turns" - - -LengthPenaltyConfig: TypeAlias = Annotated[ - TokensLengthPenaltyConfig | TurnsLengthPenaltyConfig, - Field(discriminator="type"), -] - - -class DefaultAdvantageConfig(BaseConfig): - type: Literal["default"] = "default" - - length_penalty: LengthPenaltyConfig | None = None - """Correctness-gated length penalty. ``tokens`` shapes by weighted token cost; ``turns`` shapes by trajectory turn count; None disables shaping. In mixed groups, lower-cost correct rollouts get amplified advantage (up to 2x), higher-cost correct rollouts are unchanged, incorrect untouched. In all-correct groups, below-average-cost rollouts get advantage in [0, 1], others get 0.""" - - -class CustomAdvantageConfig(BaseConfig): - type: Literal["custom"] = "custom" - - import_path: str - """Import path to the advantage function (e.g. ``my_module.my_advantage``).""" - - kwargs: dict[str, Any] = Field(default_factory=dict) - """Kwargs forwarded to the advantage function.""" - - -AdvantageConfig: TypeAlias = Annotated[ - DefaultAdvantageConfig | CustomAdvantageConfig, - Field(discriminator="type"), -] - - # Flags rare tokens generated at high entropy (Section 5.2, https://arxiv.org/abs/2510.02387). class GibberishFilterConfig(BaseConfig): type: Literal["gibberish"] = "gibberish" @@ -876,6 +881,11 @@ def resolve_batching(self): if "group_size" not in env_cfg.model_fields_set: env_cfg.group_size = self.group_size + # Propagate the top-level ``advantage`` into each train env that didn't set its own. + for env_cfg in self.train.env: + if "advantage" not in env_cfg.model_fields_set: + env_cfg.advantage = self.advantage + # Resolve train env num_workers from max_inflight_rollouts for env_cfg in self.train.env: if env_cfg.num_workers == "auto": diff --git a/src/prime_rl/orchestrator/envs.py b/src/prime_rl/orchestrator/envs.py index 8d3f072720..34e12aa63f 100644 --- a/src/prime_rl/orchestrator/envs.py +++ b/src/prime_rl/orchestrator/envs.py @@ -12,6 +12,7 @@ from verifiers.utils.serve_utils import get_free_port from prime_rl.configs.orchestrator import EnvConfig, EvalEnvConfig, TrainEnvConfig +from prime_rl.orchestrator.advantage import AdvantageFn, setup_advantage_fn from prime_rl.utils.logger import get_logger REQUIRED_STATE_COLUMNS = ["trajectory"] @@ -164,6 +165,11 @@ class TrainEnv(Env): def __init__(self, config: TrainEnvConfig): super().__init__(config) self.sampling_args = config.sampling.to_sampling_args() + # Built once — custom advantage funcs do an ``import_object`` we don't + # want to pay per group. ``None`` = reward-only path. + self.advantage_fn: AdvantageFn | None = ( + setup_advantage_fn(config.advantage) if config.advantage is not None else None + ) def get_dataset(self, seed: int | None = None): return self.env.get_dataset(seed=seed) diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py index d685b519b6..347c712b66 100644 --- a/src/prime_rl/orchestrator/orchestrator.py +++ b/src/prime_rl/orchestrator/orchestrator.py @@ -389,7 +389,6 @@ async def setup(self) -> None: mm_token_type_ids_mapping=self.mm_token_type_ids_mapping, batch_size=config.batch_size, token_batch_size=config.token_batch_size, - advantage_config=config.advantage, pre_filters=pre_filters, post_filters=post_filters, ) diff --git a/src/prime_rl/orchestrator/train_sink.py b/src/prime_rl/orchestrator/train_sink.py index 26e7b915b0..f79a0d5eff 100644 --- a/src/prime_rl/orchestrator/train_sink.py +++ b/src/prime_rl/orchestrator/train_sink.py @@ -17,8 +17,8 @@ import uuid from collections import defaultdict -from prime_rl.configs.orchestrator import AdvantageConfig, OrchestratorConfig -from prime_rl.orchestrator.advantage import assign_advantages, setup_advantage_fn +from prime_rl.configs.orchestrator import OrchestratorConfig +from prime_rl.orchestrator.advantage import assign_advantages from prime_rl.orchestrator.envs import TrainEnvs from prime_rl.orchestrator.filters import RolloutFilter, apply_filters from prime_rl.orchestrator.trajectories import ( @@ -44,7 +44,6 @@ def __init__( mm_token_type_ids_mapping: dict[int, int] | None, batch_size: int | None, token_batch_size: int | None, - advantage_config: AdvantageConfig | None, pre_filters: list[RolloutFilter], post_filters: list[RolloutFilter], ) -> None: @@ -58,9 +57,6 @@ def __init__( self.mm_token_type_ids_mapping = mm_token_type_ids_mapping self.batch_size = batch_size self.token_batch_size = token_batch_size - # Built once — custom advantage funcs do an ``import_object`` and - # we don't want to pay that per group. ``None`` = reward-only path - self.advantage_fn = setup_advantage_fn(advantage_config) if advantage_config is not None else None self.pre_filters = pre_filters self.post_filters = post_filters @@ -200,7 +196,7 @@ def process_group(self, group_id: uuid.UUID) -> None: ) return - assign_advantages(survivors, self.advantage_fn) + assign_advantages(survivors, self.train_envs.get(env_name).advantage_fn) # Propagate to the pre-tokenized samples so the orchestrator can # collect samples at ship time without re-walking rollouts. The env From 0695f9caa50a439331d064dfce0411be71383924 Mon Sep 17 00:00:00 2001 From: Tim Kostolansky <39891386+tim0120@users.noreply.github.com> Date: Tue, 9 Jun 2026 12:34:20 -0700 Subject: [PATCH 08/12] fix: allow sft without teacher (#2720) --- .../src/prime_rl/configs/orchestrator.py | 15 +++++++-------- src/prime_rl/orchestrator/dispatcher.py | 11 +++++------ src/prime_rl/orchestrator/orchestrator.py | 10 ++++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py index 83f3fea7a6..55a2210abe 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py @@ -522,7 +522,7 @@ class OrchestratorConfig(BaseConfig): """Typed renderer config (``renderers.RendererConfig`` discriminated union). Defaults to ``"auto"``, which resolves from ``tokenizer.name_or_path`` via ``MODEL_RENDERER_MAP``. ``None`` - opts into MITO (``openai_chat_completions``); SFT mode forces this.""" + opts into MITO (``openai_chat_completions``).""" pool_size: int | None = Field(None, ge=1) """Number of renderer slots shared across concurrent rollouts. Bump @@ -759,11 +759,10 @@ def validate_unique_filter_types(self): @model_validator(mode="after") def _force_no_renderer_for_sft(self): - """SFT rolls out via the teacher's plain chat-completions endpoint; the - renderer client doesn't apply. Force ``renderer=None`` so the user - doesn't have to remember to set it. Declared before the renderer - validators below so they see the corrected value.""" - if self.training_mode == "sft": + """Teacher-backed SFT rolls out via the teacher's plain chat-completions + endpoint; the renderer client doesn't apply. When no teacher is + configured, SFT uses the student rollout path and keeps the renderer.""" + if self.training_mode == "sft" and self.teacher is not None: self.renderer = None return self @@ -773,8 +772,8 @@ def validate_training_mode(self): has_teacher = self.teacher is not None if self.training_mode == "rl" and has_teacher: raise ValueError("orchestrator.teacher must not be set when training_mode = 'rl'.") - if self.training_mode in ("opd", "sft") and not has_teacher: - raise ValueError(f"orchestrator.teacher must be configured when training_mode = '{self.training_mode}'.") + if self.training_mode == "opd" and not has_teacher: + raise ValueError("orchestrator.teacher must be configured when training_mode = 'opd'.") return self @model_validator(mode="after") diff --git a/src/prime_rl/orchestrator/dispatcher.py b/src/prime_rl/orchestrator/dispatcher.py index dcc5455daa..7ebb0149d2 100644 --- a/src/prime_rl/orchestrator/dispatcher.py +++ b/src/prime_rl/orchestrator/dispatcher.py @@ -133,6 +133,7 @@ def __init__( tasks_per_minute: float | None, max_off_policy_steps: int, training_mode: Literal["rl", "opd", "sft"], + use_cache_salt: bool = True, ) -> None: self.policy = policy self.train_envs = train_envs @@ -144,6 +145,7 @@ def __init__( self.train_source = train_source self.eval_source = eval_source self.training_mode = training_mode + self.use_cache_salt = use_cache_salt self.max_off_policy_steps = max_off_policy_steps self.max_inflight = max_inflight_rollouts @@ -413,13 +415,10 @@ async def schedule_group_rollout(self, group_id: uuid.UUID, group: GroupState) - if env_collection is None: return False env = env_collection.get(group.env_name) - # SFT-mode train rollouts hit the frozen teacher pool; salting per - # policy version would invalidate the teacher's prefix cache every - # weight update for no reason. - if self.training_mode == "sft" and group.kind == "train": - cache_salt = None - else: + if group.kind == "eval" or self.use_cache_salt: cache_salt = str(group.policy_version_at_start) + else: + cache_salt = None if env.requires_group_scoring: permits = group.rollouts_to_schedule diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py index 347c712b66..9823dd73d0 100644 --- a/src/prime_rl/orchestrator/orchestrator.py +++ b/src/prime_rl/orchestrator/orchestrator.py @@ -345,13 +345,14 @@ async def setup(self) -> None: else: get_logger().info("Training from scratch") - # SFT generates rollouts via the teacher (the student is trained on - # the teacher's outputs); RL / OPD generate via the student - if config.training_mode == "sft": - assert self.teacher_inference is not None, "sft mode requires teacher inference" + # SFT train rollouts come from the teacher when configured; otherwise + # they use the existing student rollout pool. + if config.training_mode == "sft" and self.teacher_inference is not None: rollout_inference = self.teacher_inference + use_cache_salt = False else: rollout_inference = self.student_inference + use_cache_salt = True self.train_source = TrainSource(self.train_envs, seed=42) self.eval_source: EvalSource | None = ( @@ -379,6 +380,7 @@ async def setup(self) -> None: tasks_per_minute=config.tasks_per_minute, max_off_policy_steps=config.max_off_policy_steps, training_mode=config.training_mode, + use_cache_salt=use_cache_salt, ) self.metrics = MetricsBuilder(config) self.train_sink = TrainSink( From c8759c370a8ce047574cbc02b670304dd6876fd1 Mon Sep 17 00:00:00 2001 From: faresobeid <111092724+faresobeid@users.noreply.github.com> Date: Wed, 10 Jun 2026 00:01:11 +0100 Subject: [PATCH 09/12] add router replay to latentMoE models (#2738) --- src/prime_rl/trainer/models/layers/moe.py | 63 ++++++++++++------- .../models/nemotron_h/modeling_nemotron_h.py | 18 +++++- 2 files changed, 56 insertions(+), 25 deletions(-) diff --git a/src/prime_rl/trainer/models/layers/moe.py b/src/prime_rl/trainer/models/layers/moe.py index 14d46b2f89..52013c8753 100644 --- a/src/prime_rl/trainer/models/layers/moe.py +++ b/src/prime_rl/trainer/models/layers/moe.py @@ -955,32 +955,44 @@ def __init__( self.norm_topk_prob = norm_topk_prob def forward( - self, x: torch.Tensor, expert_bias: torch.Tensor | None = None + self, + x: torch.Tensor, + expert_bias: torch.Tensor | None = None, + routed_experts: torch.Tensor | None = None, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: scores = F.linear(x.float(), self.gate.float()).sigmoid() - scores_for_choice = scores + self.e_score_correction_bias - if expert_bias is not None: - scores_for_choice = scores_for_choice + expert_bias - - # Group-based routing - if self.n_group > 1: - group_scores = ( - scores_for_choice.view(-1, self.n_group, self.num_experts // self.n_group) - .topk(2, dim=-1)[0] - .sum(dim=-1) - ) - group_idx = torch.topk(group_scores, k=self.topk_group, dim=-1, sorted=False)[1] - group_mask = torch.zeros_like(group_scores) - group_mask.scatter_(1, group_idx, 1) - score_mask = ( - group_mask.unsqueeze(-1) - .expand(-1, self.n_group, self.num_experts // self.n_group) - .reshape(-1, self.num_experts) - ) - scores_for_choice = scores_for_choice.masked_fill(~score_mask.bool(), 0.0) + if routed_experts is not None: + # Router replay: reuse the inference engine's expert selection and + # only recompute the gating weights from the trainer's scores. The + # correction/load-balancing biases only affect selection, so they + # are intentionally skipped here. + selected_experts_indices = routed_experts + else: + scores_for_choice = scores + self.e_score_correction_bias + + if expert_bias is not None: + scores_for_choice = scores_for_choice + expert_bias + + # Group-based routing + if self.n_group > 1: + group_scores = ( + scores_for_choice.view(-1, self.n_group, self.num_experts // self.n_group) + .topk(2, dim=-1)[0] + .sum(dim=-1) + ) + group_idx = torch.topk(group_scores, k=self.topk_group, dim=-1, sorted=False)[1] + group_mask = torch.zeros_like(group_scores) + group_mask.scatter_(1, group_idx, 1) + score_mask = ( + group_mask.unsqueeze(-1) + .expand(-1, self.n_group, self.num_experts // self.n_group) + .reshape(-1, self.num_experts) + ) + scores_for_choice = scores_for_choice.masked_fill(~score_mask.bool(), 0.0) + + selected_experts_indices = torch.topk(scores_for_choice, k=self.top_k, dim=-1, sorted=False)[1] - selected_experts_indices = torch.topk(scores_for_choice, k=self.top_k, dim=-1, sorted=False)[1] top_scores = scores.gather(1, selected_experts_indices) routing_confidence_sum = _selected_probability_mass_sum(scores, top_scores, "sigmoid") @@ -1181,8 +1193,13 @@ def forward(self, x: torch.Tensor, routed_experts: torch.Tensor | None = None) - bs, slen, dim = x.shape x_flat = x.view(-1, dim) + if routed_experts is not None: + # Flatten to (bs * slen, top_k); reshape (not view) since the slice is non-contiguous. + _, _, top_k = routed_experts.shape + routed_experts = routed_experts.reshape(-1, top_k) + top_scores, selected_experts_indices, num_tokens_per_expert, routing_confidence_sum = self.router( - x_flat, self.expert_bias + x_flat, self.expert_bias, routed_experts=routed_experts ) with torch.no_grad(): diff --git a/src/prime_rl/trainer/models/nemotron_h/modeling_nemotron_h.py b/src/prime_rl/trainer/models/nemotron_h/modeling_nemotron_h.py index d9c5ec1ede..f64d8d43f2 100644 --- a/src/prime_rl/trainer/models/nemotron_h/modeling_nemotron_h.py +++ b/src/prime_rl/trainer/models/nemotron_h/modeling_nemotron_h.py @@ -223,6 +223,7 @@ def forward( position_embeddings: tuple[torch.Tensor, torch.Tensor] | None = None, cu_seqlens: torch.LongTensor | None = None, max_seqlen: int | None = None, + routed_experts: torch.Tensor | None = None, ) -> torch.Tensor: residual = hidden_states hidden_states = self.norm(hidden_states) @@ -266,10 +267,11 @@ def forward( position_embeddings: tuple[torch.Tensor, torch.Tensor] | None = None, cu_seqlens: torch.LongTensor | None = None, max_seqlen: int | None = None, + routed_experts: torch.Tensor | None = None, ) -> torch.Tensor: residual = hidden_states hidden_states = self.norm(hidden_states) - hidden_states = self.mlp(hidden_states) + hidden_states = self.mlp(hidden_states, routed_experts=routed_experts) return residual + hidden_states @@ -298,6 +300,7 @@ def forward( position_embeddings: tuple[torch.Tensor, torch.Tensor] | None = None, cu_seqlens: torch.LongTensor | None = None, max_seqlen: int | None = None, + routed_experts: torch.Tensor | None = None, ) -> torch.Tensor: residual = hidden_states hidden_states = self.norm(hidden_states) @@ -498,7 +501,13 @@ def forward( input_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, + routed_experts: Optional[torch.LongTensor] = None, ) -> BaseModelOutputWithPast: + """ + routed_experts (`torch.LongTensor` of shape `(batch_size, sequence_length, num_hidden_layers, num_experts_per_tok)`, *optional*): + Routed experts for each token, indexed by global layer index. Only used for router replay; slots + for non-MoE (Mamba/attention) layers are ignored. + """ if (input_ids is None) ^ (inputs_embeds is not None): raise ValueError("You must specify exactly one of input_ids or inputs_embeds") @@ -516,12 +525,15 @@ def forward( hidden_states = inputs_embeds position_embeddings = self.rotary_emb(hidden_states, position_ids) if self.rotary_emb is not None else None - for decoder_layer in self.layers: + for layer_idx, decoder_layer in enumerate(self.layers): + # routed_experts is indexed by global layer index; non-MoE layers ignore it. + routed_experts_layer = routed_experts[:, :, layer_idx, :] if routed_experts is not None else None hidden_states = decoder_layer( hidden_states, position_embeddings=position_embeddings, cu_seqlens=cu_seqlens, max_seqlen=max_seqlen, + routed_experts=routed_experts_layer, ) hidden_states = self.norm(hidden_states) @@ -550,6 +562,7 @@ def forward( labels: Optional[torch.LongTensor] = None, logits_to_keep: int = 0, temperature: Optional[torch.Tensor] = None, + routed_experts: Optional[torch.LongTensor] = None, **kwargs, ) -> PrimeLmOutput: if position_ids is None: @@ -562,6 +575,7 @@ def forward( input_ids=input_ids, position_ids=position_ids, inputs_embeds=inputs_embeds, + routed_experts=routed_experts, ) hidden_states = outputs.last_hidden_state From 632ef2b31820408a874332dbfa7d6a416ed77583 Mon Sep 17 00:00:00 2001 From: samsja <55492238+samsja@users.noreply.github.com> Date: Tue, 9 Jun 2026 19:31:08 -0700 Subject: [PATCH 10/12] chore: remove private configs submodule (#2741) Remove the configs/private submodule (research-configs) and all references to it throughout the codebase: - Remove submodule from .gitmodules and git tracking - Simplify install.sh: use plain git submodule update --init --recursive now that no private submodule can fail for users without access - Update skills/install/SKILL.md to reflect simplified submodule init - Remove configs/private/ entry from skills/configs/SKILL.md key files - Simplify test_configs.py: no longer need to filter out private/ path --- .gitmodules | 3 --- configs/private | 1 - scripts/install.sh | 18 +----------------- skills/configs/SKILL.md | 1 - skills/install/SKILL.md | 4 +--- tests/unit/test_configs.py | 5 ++--- 6 files changed, 4 insertions(+), 28 deletions(-) delete mode 160000 configs/private diff --git a/.gitmodules b/.gitmodules index 2041f460ee..b378f0ebbf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,9 +7,6 @@ [submodule "research-environments"] path = deps/research-environments url = git@github.com:PrimeIntellect-ai/research-environments.git -[submodule "configs/private"] - path = configs/private - url = git@github.com:PrimeIntellect-ai/research-configs.git [submodule "pydantic-config"] path = deps/pydantic-config url = https://github.com/PrimeIntellect-ai/pydantic-config diff --git a/configs/private b/configs/private deleted file mode 160000 index 70c3503e1d..0000000000 --- a/configs/private +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 70c3503e1dc4ea499b09f0eee206b509169b79bd diff --git a/scripts/install.sh b/scripts/install.sh index 630bf1d576..833726d3d9 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -74,27 +74,11 @@ ensure_known_hosts() { fi } -# Initialize each submodule independently so that a missing private repo -# (e.g. configs/private when the user lacks access) does not abort the install. init_submodules() { if [ ! -f .gitmodules ]; then return 0 fi - local paths failures - paths=$(git config -f .gitmodules --get-regexp '^submodule\..*\.path$' | awk '{print $2}') - failures=() - for path in $paths; do - log_info "Initializing submodule: ${path}" - if git submodule update --init --recursive -- "$path"; then - : - else - log_warn "Could not initialize submodule '${path}' (likely no access). Continuing without it." - failures+=("$path") - fi - done - if [ "${#failures[@]}" -gt 0 ]; then - log_warn "Skipped submodules: ${failures[*]}" - fi + git submodule update --init --recursive } main() { diff --git a/skills/configs/SKILL.md b/skills/configs/SKILL.md index 83f7dd8d47..129f89d551 100644 --- a/skills/configs/SKILL.md +++ b/skills/configs/SKILL.md @@ -74,5 +74,4 @@ Leave it unset for normal training. When enabled, it exports every sequence from - `packages/prime-rl-configs/src/prime_rl/` — config classes under `configs/`; `utils/config.py` re-exports `BaseConfig` and `cli` - `configs/debug/` — minimal debug configs -- `configs/private/` — private configs submodule (internal) - `examples/` — full example configs diff --git a/skills/install/SKILL.md b/skills/install/SKILL.md index 3ad7e164b8..aa3dad0661 100644 --- a/skills/install/SKILL.md +++ b/skills/install/SKILL.md @@ -16,11 +16,9 @@ bash scripts/install.sh # clones, inits submodules, installs uv, runs `uv sync For an existing clone, init submodules explicitly: ```bash -git submodule update --init -- deps/verifiers deps/renderers deps/research-environments deps/pydantic-config +git submodule update --init --recursive ``` -Do **not** run `git submodule update --init --recursive` without paths — it tries to clone the private `configs/private` submodule and aborts for users without access. `scripts/install.sh` walks submodules one at a time and skips failures, so it works for everyone. - ## Sync ```bash diff --git a/tests/unit/test_configs.py b/tests/unit/test_configs.py index fcdee7a843..9ad10fef69 100644 --- a/tests/unit/test_configs.py +++ b/tests/unit/test_configs.py @@ -26,9 +26,8 @@ def get_config_files() -> list[Path]: - """Any TOML file inside `configs/` or `examples/` (skips the configs/private/ submodule).""" - private = Path("configs/private") - config_files = [p for p in Path("configs").rglob("*.toml") if private not in p.parents] + """Any TOML file inside `configs/` or `examples/`.""" + config_files = list(Path("configs").rglob("*.toml")) example_files = list(Path("examples").rglob("*.toml")) return config_files + example_files From 04d067145f5f2698112c93f61c78f457f2f19fda Mon Sep 17 00:00:00 2001 From: samsja <55492238+samsja@users.noreply.github.com> Date: Tue, 9 Jun 2026 19:31:19 -0700 Subject: [PATCH 11/12] update deps (#2736) * update deps * update deps * update deps --- deps/research-environments | 2 +- deps/verifiers | 2 +- pyproject.toml | 5 +- uv.lock | 804 ++++++++++++++++++++++++++++++++----- 4 files changed, 713 insertions(+), 100 deletions(-) diff --git a/deps/research-environments b/deps/research-environments index c752781984..4c08260f07 160000 --- a/deps/research-environments +++ b/deps/research-environments @@ -1 +1 @@ -Subproject commit c752781984c1b4fbb0a3d7f4aac1e7ed67cc749e +Subproject commit 4c08260f07d1f907d3adee93bf55b94e177865c9 diff --git a/deps/verifiers b/deps/verifiers index 05c66c2358..0ad8b4d523 160000 --- a/deps/verifiers +++ b/deps/verifiers @@ -1 +1 @@ -Subproject commit 05c66c235875d785754f2b7078db0e7deeddbeae +Subproject commit 0ad8b4d523caf7d5eeceb013eee1b63b737925ea diff --git a/pyproject.toml b/pyproject.toml index 719b1228e6..4f02554486 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,6 @@ envs = [ "math-python", "math500", "mini-swe-agent-plus", - "mini-swe-agent-plus-rlm", "mmlu-pro", "opencode-cp", "opencode-deepdive", @@ -152,6 +151,7 @@ override-dependencies = [ "transformers==5.6.2", "torch>=2.9.0", "openenv-core", + "verifiers[packages]>=0.1.15.dev150", ] # ModelExpress 0.3.0 publishes protobuf<6 metadata, but its generated proto is @@ -209,6 +209,7 @@ color-codeword = { path = "deps/research-environments/environments/color_codewor deepdive = { path = "deps/research-environments/environments/deepdive", editable = true } general-agent = { path = "deps/research-environments/environments/general_agent", editable = true } gpqa = { path = "deps/research-environments/environments/gpqa", editable = true } +harnesses = { path = "deps/verifiers/packages/harnesses", editable = true } hle = { path = "deps/research-environments/environments/hle", editable = true } ifeval = { path = "deps/research-environments/environments/ifeval", editable = true } livecodebench = { path = "deps/research-environments/environments/livecodebench", editable = true } @@ -217,7 +218,6 @@ math-env = { path = "deps/research-environments/environments/math_env", editable math-python = { path = "deps/verifiers/environments/math_python", editable = true } math500 = { path = "deps/research-environments/environments/math500", editable = true } mini-swe-agent-plus = { path = "deps/research-environments/environments/mini_swe_agent_plus", editable = true } -mini-swe-agent-plus-rlm = { path = "deps/research-environments/environments/mini_swe_agent_plus_rlm", editable = true } mmlu-pro = { path = "deps/research-environments/environments/mmlu_pro", editable = true } opencode-cp = { path = "deps/research-environments/environments/opencode_cp", editable = true } opencode-deepdive = { path = "deps/research-environments/environments/opencode_deepdive", editable = true } @@ -229,6 +229,7 @@ rlm-swe = { path = "deps/research-environments/environments/rlm_swe", editable = science-env = { path = "deps/research-environments/environments/science_env", editable = true } simpleqa-verified = { path = "deps/research-environments/environments/simpleqa_verified", editable = true } tau2-bench = { path = "deps/research-environments/environments/tau2_bench", editable = true } +tasksets = { path = "deps/verifiers/packages/tasksets", editable = true } wiki-search = { path = "deps/verifiers/environments/wiki_search", editable = true } wordle = { path = "deps/verifiers/environments/wordle", editable = true } torch = { index = "pytorch-cu128" } diff --git a/uv.lock b/uv.lock index 3922f32b28..af14aed2c2 100644 --- a/uv.lock +++ b/uv.lock @@ -11,7 +11,7 @@ supported-markers = [ ] [options] -exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. +exclude-newer = "2026-06-01T23:58:54.614773201Z" exclude-newer-span = "P7D" [options.exclude-newer-package] @@ -41,6 +41,7 @@ overrides = [ { name = "openenv-core" }, { name = "torch", specifier = ">=2.9.0", index = "https://download.pytorch.org/whl/cu128" }, { name = "transformers", specifier = "==5.6.2" }, + { name = "verifiers", extras = ["packages"], editable = "deps/verifiers" }, ] [[manifest.dependency-metadata]] @@ -68,32 +69,44 @@ wheels = [ [[package]] name = "aime2024" -version = "0.1.20" +version = "0.2.0" source = { editable = "deps/research-environments/environments/aime2024" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev152" }, ] [[package]] name = "aime2025" -version = "0.1.20" +version = "0.2.0" source = { editable = "deps/research-environments/environments/aime2025" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev152" }, +] + +[[package]] +name = "aiofile" +version = "3.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "caio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/41/2fea7e193e061ce54eacc3b7bc0e6a99e4fcff43c78cf0a76dd781ed8334/aiofile-3.11.1.tar.gz", hash = "sha256:1f91912c6643d2a4e49ca4ae3514f0bf3867ce948a36d99a6411b8f4755f4cf9", size = 19342, upload-time = "2026-05-16T08:18:33.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/cd/0d76dfc5de72bde52f55f53e925c7d152d9c7906634ec1e0cbc7e8d4ad93/aiofile-3.11.1-py3-none-any.whl", hash = "sha256:ce77d14ac07f77bc2b757834a5c129321f3f705c474593deed5ab209079a52c9", size = 20446, upload-time = "2026-05-16T08:18:32.051Z" }, ] [[package]] @@ -162,7 +175,7 @@ name = "alphabet-sort" version = "0.1.12" source = { editable = "deps/verifiers/environments/alphabet_sort" } dependencies = [ - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -278,6 +291,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, ] +[[package]] +name = "authlib" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "joserfc", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/98/7d93f30d029643c0275dbc0bd6d5a6f670661ee6c9a94d93af7ab4887600/authlib-1.7.2.tar.gz", hash = "sha256:2cea25fefcd4e7173bdf1372c0afc265c8034b23a8cd5dcb6a9164b826c64231", size = 176511, upload-time = "2026-05-06T08:10:23.116Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/95/adcb68e20c34162e9135f370d6e31737719c2b6f94bc953fe7ed1f10fe21/authlib-1.7.2-py2.py3-none-any.whl", hash = "sha256:3e1faedc9d87e7d56a164eca3ccb6ace0d61b94abe83e92242f8dc8bba9b4a9f", size = 259548, upload-time = "2026-05-06T08:10:21.436Z" }, +] + [[package]] name = "backoff" version = "2.2.1" @@ -358,6 +384,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/c6/d1fe8bdea4a6088bd54b5a58bc40aed89a4e784cd796af7722a06f74bae7/blake3-1.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a25db3d36b55f5ed6a86470155cc749fc9c5b91c949b8d14f48658f9d960d9ec", size = 554211, upload-time = "2025-10-14T06:46:00.269Z" }, ] +[[package]] +name = "brotli" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/16/c92ca344d646e71a43b8bb353f0a6490d7f6e06210f8554c8f874e454285/brotli-1.2.0.tar.gz", hash = "sha256:e310f77e41941c13340a95976fe66a8a95b01e783d430eeaf7a2f87e0a57dd0a", size = 7388632, upload-time = "2025-11-05T18:39:42.86Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/6f/feba03130d5fceadfa3a1bb102cb14650798c848b1df2a808356f939bb16/brotli-1.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:acec55bb7c90f1dfc476126f9711a8e81c9af7fb617409a9ee2953115343f08d", size = 1528071, upload-time = "2025-11-05T18:38:26.081Z" }, + { url = "https://files.pythonhosted.org/packages/03/a7/03aa61fbc3c5cbf99b44d158665f9b0dd3d8059be16c460208d9e385c837/brotli-1.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:072e7624b1fc4d601036ab3f4f27942ef772887e876beff0301d261210bca97f", size = 1419762, upload-time = "2025-11-05T18:38:28.295Z" }, + { url = "https://files.pythonhosted.org/packages/21/1b/0374a89ee27d152a5069c356c96b93afd1b94eae83f1e004b57eb6ce2f10/brotli-1.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adedc4a67e15327dfdd04884873c6d5a01d3e3b6f61406f99b1ed4865a2f6d28", size = 1484494, upload-time = "2025-11-05T18:38:29.29Z" }, + { url = "https://files.pythonhosted.org/packages/d5/3b/39e13ce78a8e9a621c5df3aeb5fd181fcc8caba8c48a194cd629771f6828/brotli-1.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:af43b8711a8264bb4e7d6d9a6d004c3a2019c04c01127a868709ec29962b6036", size = 1487913, upload-time = "2025-11-05T18:38:31.618Z" }, +] + [[package]] name = "build" version = "1.5.0" @@ -392,6 +430,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8c/7b/1fc1c09cc0756cf25861a3be10565915953876da48bb228fb9a672b20a42/cachetools-7.1.4-py3-none-any.whl", hash = "sha256:323dc4127934744db5b54eb4924482d7edafbf9554e820d1531c2e08c0e4ef54", size = 16761, upload-time = "2026-05-21T22:40:41.845Z" }, ] +[[package]] +name = "caio" +version = "0.9.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/92/88/b8527e1b00c1811db339a1df8bd1ae49d146fcea9d6a5c40e3a80aaeb38d/caio-0.9.25.tar.gz", hash = "sha256:16498e7f81d1d0f5a4c0ad3f2540e65fe25691376e0a5bd367f558067113ed10", size = 26781, upload-time = "2025-12-26T15:21:36.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/2b/21288691f16d479945968a0a4f2856818c1c5be56881d51d4dac9b255d26/caio-0.9.25-cp312-cp312-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:97084e4e30dfa598449d874c4d8e0c8d5ea17d2f752ef5e48e150ff9d240cd64", size = 82012, upload-time = "2025-12-26T15:22:20.983Z" }, + { url = "https://files.pythonhosted.org/packages/03/c4/8a1b580875303500a9c12b9e0af58cb82e47f5bcf888c2457742a138273c/caio-0.9.25-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:4fa69eba47e0f041b9d4f336e2ad40740681c43e686b18b191b6c5f4c5544bfb", size = 81502, upload-time = "2026-03-04T22:08:22.381Z" }, + { url = "https://files.pythonhosted.org/packages/d1/1c/0fe770b8ffc8362c48134d1592d653a81a3d8748d764bec33864db36319d/caio-0.9.25-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:6bebf6f079f1341d19f7386db9b8b1f07e8cc15ae13bfdaff573371ba0575d69", size = 80200, upload-time = "2026-03-04T22:08:23.382Z" }, + { url = "https://files.pythonhosted.org/packages/86/93/1f76c8d1bafe3b0614e06b2195784a3765bbf7b0a067661af9e2dd47fc33/caio-0.9.25-py3-none-any.whl", hash = "sha256:06c0bb02d6b929119b1cfbe1ca403c768b2013a369e2db46bfa2a5761cf82e40", size = 19087, upload-time = "2025-12-26T15:22:00.221Z" }, +] + [[package]] name = "cbor2" version = "6.1.1" @@ -526,11 +576,11 @@ wheels = [ [[package]] name = "code-env" -version = "0.3.1" +version = "0.3.2" source = { editable = "deps/research-environments/environments/code_env" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -538,7 +588,7 @@ requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.19" }, { name = "pytest", marker = "extra == 'dev'" }, { name = "pytest-asyncio", marker = "extra == 'dev'" }, - { name = "verifiers", specifier = ">=0.1.13.dev8" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] provides-extras = ["dev"] @@ -548,13 +598,13 @@ version = "0.1.0" source = { editable = "deps/research-environments/environments/color_codeword" } dependencies = [ { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "pillow", specifier = ">=10.0.0" }, - { name = "verifiers", specifier = ">=0.1.10" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -731,6 +781,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] +[[package]] +name = "cyclopts" +version = "4.16.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "docstring-parser", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich-rst", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/34/07/bf61d13de86d96a4c46aff00c9ca0eced44bcc8c3e16280605c1253e5720/cyclopts-4.16.1.tar.gz", hash = "sha256:8aa47bf92a5fb33abca5af05e576eecdb0d2f79893ad29238046df78370fc4a8", size = 181196, upload-time = "2026-05-25T15:29:08.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/8d/7f362c2fb8ef4decd2160bc24d4292c6ca658cc6d9a161b89ca5122bbdbf/cyclopts-4.16.1-py3-none-any.whl", hash = "sha256:617795392c4113a2c2cc7af716f20244900e87f23daa05442d1268d81472a592", size = 219020, upload-time = "2026-05-25T15:29:09.646Z" }, +] + [[package]] name = "dataclasses-json" version = "0.6.7" @@ -826,7 +891,7 @@ dependencies = [ { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "diskcache", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pdfminer-six", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -835,7 +900,7 @@ requires-dist = [ { name = "datasets", specifier = ">=4.0.0" }, { name = "diskcache", specifier = ">=5.6.0" }, { name = "pdfminer-six", specifier = ">=20251107" }, - { name = "verifiers", specifier = ">=0.1.11.dev0" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -968,6 +1033,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" }, ] +[[package]] +name = "exceptiongroup" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, +] + [[package]] name = "executing" version = "2.2.1" @@ -1076,6 +1153,66 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/41/2c368f804bb9bd918da3b61324207fc4b410d0f32352c372c0680fc1f670/fastcore-1.13.2-py3-none-any.whl", hash = "sha256:2103c9e9e613311c0b36eab17299a221e778fd214ec526e8df1d32908928277c", size = 105060, upload-time = "2026-05-17T06:02:22.28Z" }, ] +[[package]] +name = "fastmcp" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastmcp-slim", extra = ["client", "server"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3b/a9/5c5a01b6abd5346bf60b97cfd29e4a86661940c27dd562bfcda07fd03519/fastmcp-3.3.1.tar.gz", hash = "sha256:979362ea557de42a5f40342563c7e4b236bcc8e7cd192715f50030695d1a71cd", size = 28681699, upload-time = "2026-05-15T15:50:39.673Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/11/6b1bdada6ccfe647d615ae63f9106f8136aec17971e9361546af01c7d38e/fastmcp-3.3.1-py3-none-any.whl", hash = "sha256:862440c5c4d281363a5995eee59d77f0f7cac1f18869038729cecf03b02fc522", size = 7903, upload-time = "2026-05-15T15:50:36.424Z" }, +] + +[[package]] +name = "fastmcp-slim" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "platformdirs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", extra = ["email"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic-settings", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "python-dotenv", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/a0/627103e517e1d0d6f1eec633d5662d13e776f01b45ad188e4f5f7478b438/fastmcp_slim-3.3.1.tar.gz", hash = "sha256:0957835fc59452e143ab2f4b7836d2d2df9b2d9958408edc79ba8b56232b2a88", size = 567007, upload-time = "2026-05-15T15:50:10.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/ee/97047f4cc2d7b1d46670d08d8ad01a96e7a748cc01c0b4b351ad8eddbc7a/fastmcp_slim-3.3.1-py3-none-any.whl", hash = "sha256:6cf1c2d77e3adb0d409d6825ed6b0b2a999062973e00b8eea03bd48bf9b4c043", size = 738644, upload-time = "2026-05-15T15:50:08.336Z" }, +] + +[package.optional-dependencies] +client = [ + { name = "authlib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "exceptiongroup", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "opentelemetry-api", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +server = [ + { name = "authlib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "cyclopts", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "exceptiongroup", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "griffelib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jsonref", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jsonschema-path", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "openapi-pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "opentelemetry-api", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "packaging", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyperclip", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "python-multipart", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uncalled-for", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "watchfiles", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + [[package]] name = "fastokens" version = "0.2.0" @@ -1281,7 +1418,7 @@ source = { editable = "deps/research-environments/environments/general_agent" } dependencies = [ { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "tyro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -1292,7 +1429,7 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'" }, { name = "ty", marker = "extra == 'dev'" }, { name = "tyro", specifier = ">=0.9" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" }, ] provides-extras = ["dev", "test"] @@ -1362,6 +1499,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/7a/1c6e3562dfd8950adbb11ffbc65d21e7c89d01a6e4f137fa981056de25c5/gitpython-3.1.50-py3-none-any.whl", hash = "sha256:d352abe2908d07355014abdd21ddf798c2a961469239afec4962e9da884858f9", size = 212507, upload-time = "2026-05-06T04:01:23.799Z" }, ] +[[package]] +name = "google-auth" +version = "2.53.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyasn1-modules", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/ad/ff781329bbbdc0974a098d996e89c9e1f7024262f9e3eec442fbb9ad1ac6/google_auth-2.53.0.tar.gz", hash = "sha256:e7e6aa16f6bee7b2b264830fd04f08087a1d5a836df516251a5d15327b246c9c", size = 335844, upload-time = "2026-05-15T20:53:07.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/c9/db44165ba7c581268c6d46017ef63339110378305062830104fc7fa144cb/google_auth-2.53.0-py3-none-any.whl", hash = "sha256:6e7449917c599b35126a99ec268ec6880301f2fea41dce198fe8fd83ff642b68", size = 246071, upload-time = "2026-05-15T20:53:05.609Z" }, +] + +[package.optional-dependencies] +requests = [ + { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[[package]] +name = "google-genai" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "distro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "google-auth", extra = ["requests"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "sniffio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/7b/6eb3b3d545b6bb4c374acba1ccf91b0f33b605e551536a6243cfcef2f07f/google_genai-2.7.0.tar.gz", hash = "sha256:3c6f32f5ced9877ededd1b384b5e5b7f09c20046ec3390b662b16d8cd1882ac5", size = 555853, upload-time = "2026-05-28T15:39:24.58Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/dd/7a8be39e9d698e80e9db796514efbc6083dbd787bdb9a101e8ba47248e5e/google_genai-2.7.0-py3-none-any.whl", hash = "sha256:21cac381e09a869151706aba797b6a4f96cfe92c484e13204d092caee7ff11cb", size = 822545, upload-time = "2026-05-28T15:39:22.907Z" }, +] + [[package]] name = "googleapis-common-protos" version = "1.75.0" @@ -1380,13 +1556,67 @@ version = "0.1.5" source = { editable = "deps/research-environments/environments/gpqa" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, +] + +[[package]] +name = "gradio" +version = "6.15.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "brotli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "gradio-client", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "groovy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "hf-gradio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jinja2", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "markupsafe", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "orjson", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "packaging", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pandas", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "python-multipart", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pytz", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "safehttpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "semantic-version", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tomlkit", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/7a/edc719d67beea0963721a7f552604faa0feaf218d4a51c6e0dacfb51ba6a/gradio-6.15.1.tar.gz", hash = "sha256:58be31be7b3aab53bbe61f21c20666b4a8a25a6737c399e02b7463d669625851", size = 36429761, upload-time = "2026-05-27T13:20:35.232Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/87/b6d08b4b0a5bf7bb219c4c44d1d267ce6dfbc1ff062ca05ce251a50d18a5/gradio-6.15.1-py3-none-any.whl", hash = "sha256:f4f50488f8da1137b8e0d65fe656348b11bac3cd2f5ccab60636eb45e5a6f39f", size = 20093799, upload-time = "2026-05-27T13:20:31.18Z" }, +] + +[[package]] +name = "gradio-client" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fsspec", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "packaging", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e8/e6/6b6029f5fe2ad7f1211105d530e34d991014c2cae463f9223033031cfc4f/gradio_client-2.5.0.tar.gz", hash = "sha256:4cde99bad62149595c30c90876ca2e405e3a13687ecf895474f3412cb476673d", size = 59013, upload-time = "2026-04-20T23:16:21.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/81/0a861b8e1ff42960139c6cd4c7dd591292fa09ea1ae2d87677441cba4c00/gradio_client-2.5.0-py3-none-any.whl", hash = "sha256:d43e2179c29076292a76485ad7ed2e6eaa19d14ac58283bd7f5beabfe4ca958c", size = 59952, upload-time = "2026-04-20T23:16:20.186Z" }, ] [[package]] @@ -1398,6 +1628,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/8c/c9138d881c79aa0ea9ed83cbd58d5ca75624378b38cee225dcf5c42cc91f/griffelib-2.0.2-py3-none-any.whl", hash = "sha256:925c857658fb1ba40c0772c37acbc2ab650bd794d9c1b9726922e36ea4117ea1", size = 142357, upload-time = "2026-03-27T11:34:46.275Z" }, ] +[[package]] +name = "groovy" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/36/bbdede67400277bef33d3ec0e6a31750da972c469f75966b4930c753218f/groovy-0.1.2.tar.gz", hash = "sha256:25c1dc09b3f9d7e292458aa762c6beb96ea037071bf5e917fc81fb78d2231083", size = 17325, upload-time = "2025-02-28T20:24:56.068Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/27/3d6dcadc8a3214d8522c1e7f6a19554e33659be44546d44a2f7572ac7d2a/groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64", size = 14090, upload-time = "2025-02-28T20:24:55.152Z" }, +] + [[package]] name = "grpcio" version = "1.80.0" @@ -1463,6 +1702,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, ] +[[package]] +name = "harnesses" +source = { editable = "deps/verifiers/packages/harnesses" } +dependencies = [ + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiohttp", marker = "extra == 'nemogym'", specifier = ">=3.9.0" }, + { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" }, + { name = "verifiers", specifier = ">=0.1.15.dev158" }, +] +provides-extras = ["nemogym"] + +[[package]] +name = "hf-gradio" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gradio-client", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/86/c9694b7cfada5780e75769e60dc161a161f4dd7fc91b61db5e3a3338bef9/hf_gradio-0.4.1.tar.gz", hash = "sha256:a017d942618f0d495a58ee4563047fa04bef614c00e0cb789a9a6d0633cffa7b", size = 6560, upload-time = "2026-04-22T14:01:32.334Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/2d/afff2ee87e75d8eb85c92bb8cf0e15b05c23c2ebd8fd8dec781d8601ed7f/hf_gradio-0.4.1-py3-none-any.whl", hash = "sha256:76b8cb8be6abe62d74c1ad2d35b42f0629db89aa9e1a8d033cecfe7c856eeab3", size = 4482, upload-time = "2026-04-17T19:53:31.827Z" }, +] + [[package]] name = "hf-xet" version = "1.5.0" @@ -1481,13 +1748,13 @@ version = "0.2.1" source = { editable = "deps/research-environments/environments/hle" } dependencies = [ { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "pillow", specifier = ">=12.0.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -1632,7 +1899,7 @@ dependencies = [ { name = "immutabledict", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "langdetect", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -1640,7 +1907,7 @@ requires-dist = [ { name = "immutabledict" }, { name = "langdetect" }, { name = "nltk" }, - { name = "verifiers", specifier = ">=0.1.10" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -1775,6 +2042,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/6d/0d9848617b9f753b87f214f1c682592f7ca42de085f564352f10f0843026/ipywidgets-8.1.8-py3-none-any.whl", hash = "sha256:ecaca67aed704a338f88f67b1181b58f821ab5dc89c1f0f5ef99db43c1c2921e", size = 139808, upload-time = "2025-11-01T21:18:10.956Z" }, ] +[[package]] +name = "jaraco-classes" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" }, +] + +[[package]] +name = "jaraco-context" +version = "6.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/50/4763cd07e722bb6285316d390a164bc7e479db9d90daa769f22578f698b4/jaraco_context-6.1.2.tar.gz", hash = "sha256:f1a6c9d391e661cc5b8d39861ff077a7dc24dc23833ccee564b234b81c82dfe3", size = 16801, upload-time = "2026-03-20T22:13:33.922Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/58/bc8954bda5fcda97bd7c19be11b85f91973d67a706ed4a3aec33e7de22db/jaraco_context-6.1.2-py3-none-any.whl", hash = "sha256:bf8150b79a2d5d91ae48629d8b427a8f7ba0e1097dd6202a9059f29a36379535", size = 7871, upload-time = "2026-03-20T22:13:32.808Z" }, +] + +[[package]] +name = "jaraco-functools" +version = "4.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/cf/ea4ef2920830dea3f5ab2ea4da6fb67724e6dca80ee2553788c3607243d0/jaraco_functools-4.5.0.tar.gz", hash = "sha256:3bb5665ea4a020cf78a7040e89154c77edadb3ca74f366479669c5999aa70b03", size = 20272, upload-time = "2026-05-15T21:34:10.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/9a/982e48afcffcd727a9144506720ffd4224b6b7e355c98641866f38b7c043/jaraco_functools-4.5.0-py3-none-any.whl", hash = "sha256:79ce39246eddbde4b3a03b77ea5f0f7878dc669b166a66cf3fa8e266aa3fa2f4", size = 10594, upload-time = "2026-05-15T21:34:08.595Z" }, +] + [[package]] name = "jaxtyping" version = "0.3.10" @@ -1799,6 +2099,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/93/242e2eab5fe682ffcb8b0084bde703a41d51e17ee0f3a31ff0d9d813620a/jedi-0.20.0-py2.py3-none-any.whl", hash = "sha256:7bdd9c2634f56713299976f4cbd59cb3fa92165cc5e05ea811fb253480728b67", size = 4884812, upload-time = "2026-05-01T23:38:43.919Z" }, ] +[[package]] +name = "jeepney" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1843,6 +2152,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, ] +[[package]] +name = "joserfc" +version = "1.6.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/ac/d4fd5b30f82900eac60d765f179f0ba005825ac462cc8ced6e13ec685ab3/joserfc-1.6.8.tar.gz", hash = "sha256:878620c553a6ebdd76ccdc356782fee3f735f21a356d079a546b42a4670ace5f", size = 232930, upload-time = "2026-05-27T03:22:37.819Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/8c/5cdce2cf3ce8155849baf9a5e2ce77e89dc87ec3bdb38259e5d85fbc45bd/joserfc-1.6.8-py3-none-any.whl", hash = "sha256:22fb31a69094a5e6f44632002a9df2c30c941fc6c8ce1b037e92c03de954cf9f", size = 70927, upload-time = "2026-05-27T03:22:35.796Z" }, +] + +[[package]] +name = "jsonref" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload-time = "2023-01-16T16:10:04.455Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload-time = "2023-01-16T16:10:02.255Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -1858,6 +2188,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, ] +[[package]] +name = "jsonschema-path" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pathable", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "referencing", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/79/cd02a4df6d9270efdc7d3feefe6edd730b0820c39eeaa107a2faee8322d5/jsonschema_path-0.5.0.tar.gz", hash = "sha256:493b156ba895c97602655b620a8456caa2ce08c1aa389f5a7addec065e6e855c", size = 19597, upload-time = "2026-05-19T20:45:00.971Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/2c/9e69d73c4297508be9e3b64a970ea3971b3eb8db64ffc5802d40bd25981f/jsonschema_path-0.5.0-py3-none-any.whl", hash = "sha256:2790a070bc7abb08ea3dbe4d340ece4efadf639223001f020c7503229ba068e2", size = 24077, upload-time = "2026-05-19T20:44:59.225Z" }, +] + [[package]] name = "jsonschema-specifications" version = "2025.9.1" @@ -1936,6 +2281,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/87/ea/fe955e157094d93fe20919441dc709ba7afbcf933de5896b546d5c217938/kernels_data-0.14.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ec694ae26ed8569f613e5cd4269222210cb57b1c16cf8b0c5acbf0082324804f", size = 1414055, upload-time = "2026-05-14T06:41:14.424Z" }, ] +[[package]] +name = "keyring" +version = "25.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jaraco-classes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jaraco-context", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jaraco-functools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jeepney", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "secretstorage", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" }, +] + [[package]] name = "kiwisolver" version = "1.5.0" @@ -2070,17 +2431,17 @@ wheels = [ [[package]] name = "livecodebench" -version = "0.2.6" +version = "0.2.7" source = { editable = "deps/research-environments/environments/livecodebench" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.19" }, - { name = "verifiers", specifier = ">=0.1.15.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2126,7 +2487,7 @@ dependencies = [ { name = "markdown", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "sympy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -2134,7 +2495,7 @@ requires-dist = [ { name = "markdown", specifier = ">=3.5.1" }, { name = "math-verify", specifier = ">=0.8.0" }, { name = "sympy", specifier = ">=1.12.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2213,19 +2574,19 @@ wheels = [ [[package]] name = "math-env" -version = "0.1.5" +version = "0.1.6" source = { editable = "deps/research-environments/environments/math_env" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, { name = "prime-sandboxes", specifier = ">=0.2.19" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2235,7 +2596,7 @@ source = { editable = "deps/verifiers/environments/math_python" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -2263,13 +2624,13 @@ version = "0.1.17" source = { editable = "deps/research-environments/environments/math500" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2353,13 +2714,13 @@ wheels = [ [[package]] name = "mini-swe-agent-plus" -version = "0.2.24" +version = "0.2.25" source = { editable = "deps/research-environments/environments/mini_swe_agent_plus" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -2367,26 +2728,7 @@ requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.19" }, { name = "swebench", specifier = "==4.1.0" }, { name = "tenacity" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, -] - -[[package]] -name = "mini-swe-agent-plus-rlm" -version = "0.1.6" -source = { editable = "deps/research-environments/environments/mini_swe_agent_plus_rlm" } -dependencies = [ - { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, -] - -[package.metadata] -requires-dist = [ - { name = "prime-sandboxes", specifier = ">=0.2.19" }, - { name = "swebench", specifier = "==4.1.0" }, - { name = "tenacity" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2444,13 +2786,13 @@ version = "0.1.3" source = { editable = "deps/research-environments/environments/mmlu_pro" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2525,6 +2867,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/a4/a187adcd485ff27bdbdb5c2b4d9cf210427bc74bcaacfc8226409db17535/mooncake_transfer_engine-0.3.11.post1-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:1ccad9f44cf1a67f4e0494bd02f505503139ab606ecbe76cd6050d7a069247d5", size = 18089789, upload-time = "2026-05-24T16:19:01.828Z" }, ] +[[package]] +name = "more-itertools" +version = "11.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/de/1d/f4da6f02cdffe04d6362210b807146a26044c88d839208aec273bb0d9184/more_itertools-11.1.0.tar.gz", hash = "sha256:48e8f4d9e7e5878571ecf6f2b4e57634f93cd474cc8cfbd2376f2d11b396e30d", size = 145772, upload-time = "2026-05-22T14:14:29.909Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/3d/1087453384dbde46a8c7f9356eead2c58be8a7bf156bca40243377c85715/more_itertools-11.1.0-py3-none-any.whl", hash = "sha256:4b65538ae22f6fed0ce4874efd317463a7489796a0939fa66824dd542125a192", size = 72226, upload-time = "2026-05-22T14:14:28.824Z" }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -3046,83 +3397,95 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" }, ] +[[package]] +name = "openapi-pydantic" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/2e/58d83848dd1a79cb92ed8e63f6ba901ca282c5f09d04af9423ec26c56fd7/openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d", size = 60892, upload-time = "2025-01-08T19:29:27.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" }, +] + [[package]] name = "opencode-cp" -version = "0.3.10" +version = "0.3.12" source = { editable = "deps/research-environments/environments/opencode_cp" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.19" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17,<0.1.15.dev150" }, ] [[package]] name = "opencode-deepdive" -version = "0.1.16" +version = "0.1.17" source = { editable = "deps/research-environments/environments/opencode_deepdive" } dependencies = [ { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "datasets", specifier = ">=4.0.0" }, { name = "prime-sandboxes", specifier = ">=0.2.25" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", specifier = ">=0.1.15.dev17,<0.1.15.dev150" }, ] [[package]] name = "opencode-math" -version = "0.4.11" +version = "0.4.13" source = { editable = "deps/research-environments/environments/opencode_math" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17,<0.1.15.dev150" }, ] [[package]] name = "opencode-science" -version = "0.3.11" +version = "0.3.13" source = { editable = "deps/research-environments/environments/opencode_science" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17,<0.1.15.dev150" }, ] [[package]] name = "opencode-swe" -version = "0.4.7" +version = "0.4.9" source = { editable = "deps/research-environments/environments/opencode_swe" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.23" }, { name = "swebench", specifier = "==4.1.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17,<0.1.15.dev150" }, ] [[package]] @@ -3139,6 +3502,56 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4b/33/b5db29a6c00eb8f50708110d8d453747ca125c8b805bc437b289dbdcc057/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0bd48544f77c68b2941392fcdf9bcd2b9cdf00e98cb8c29b2455d194763cf99e", size = 60391106, upload-time = "2026-02-05T10:30:14.236Z" }, ] +[[package]] +name = "openenv-core" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "fastmcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "gradio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tomli-w", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/d6/3bebe8afb55fcc3ea9251c4c2dfbab2879e31089bc91a8fe9696e5ce019b/openenv_core-0.3.0.tar.gz", hash = "sha256:c7fee2035badab5be497eb6f4afb2cb417de000f82cc19afd72fb5ec332c431d", size = 164720, upload-time = "2026-05-11T11:37:57.274Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/f5/aafa43138589bfd5d369a8d02ea365aae9d6fe55ac0b3894368d6d69bd03/openenv_core-0.3.0-py3-none-any.whl", hash = "sha256:859e875c9d5211b157c30fb9abc681606fcf0bf1b6ffcdf404678992823a1df0", size = 194313, upload-time = "2026-05-11T11:37:55.537Z" }, +] + +[[package]] +name = "openreward" +version = "0.1.125" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "anthropic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "click", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "google-genai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "sse-starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "structlog", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/dd/b69264c77fd9720f69443c5f6420a7ae9934b4e2799e276d8655f6dc0721/openreward-0.1.125.tar.gz", hash = "sha256:519687307f960ab3a395bf844d6c2fc018d8a0faad0fc367ad3b24331366d390", size = 138936, upload-time = "2026-05-21T10:24:10.075Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/11/51c5473d5b3d209ecaacee6ac5418eef15114d0817a98f9cee0a2d6364ee/openreward-0.1.125-py3-none-any.whl", hash = "sha256:784faeeef6aba2ce8f175bd9af5dff29b3bb0c07a4c3642eb56dc431c9af2924", size = 135521, upload-time = "2026-05-21T10:24:11.369Z" }, +] + [[package]] name = "opentelemetry-api" version = "1.42.1" @@ -3347,6 +3760,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/32/658973117bf0fd82a24abbfb94fe73a5e86216e49342985e10acce54775a/partial_json_parser-0.2.1.1.post7-py3-none-any.whl", hash = "sha256:145119e5eabcf80cbb13844a6b50a85c68bf99d376f8ed771e2a3c3b03e653ae", size = 10877, upload-time = "2025-11-17T07:27:40.457Z" }, ] +[[package]] +name = "pathable" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/f3/5a20387de9bcd0607871bfc2198ee0e15836da7baa4592ccd7f24c27c986/pathable-0.6.0.tar.gz", hash = "sha256:6404b8b82aef5ff0fd478934137128b99b12212ba35afdde5525ca4f8388ea58", size = 18970, upload-time = "2026-05-19T18:15:11.911Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/e8/6d75ffd9784bce2e93d1ae4415649427e39a53bb172d4672b2b59c6f0a7b/pathable-0.6.0-py3-none-any.whl", hash = "sha256:82c4ca6c98c502ad12e0d4e9779b6210afee93c38990988c8c5d1b49bdcdf566", size = 18983, upload-time = "2026-05-19T18:15:10.728Z" }, +] + [[package]] name = "pdfminer-six" version = "20260107" @@ -3453,7 +3875,7 @@ dependencies = [ { name = "toml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/99/62/3361ae244cfb65009a1a089c4101c02e953fe3b126b2337c44ac50aac51e/prime-0.6.10.tar.gz", hash = "sha256:eee8341905e5daed4f751a2bf0f03e5d52f420450e5f45c4d5aa1856b806faea", size = 629514, upload-time = "2026-05-27T20:53:45.841Z" } wheels = [ @@ -3542,7 +3964,7 @@ dependencies = [ { name = "torchvision", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "transformers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "uvloop", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "vllm", version = "0.22.0+cu129", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.22.0/vllm-0.22.0+cu129-cp38-abi3-manylinux_2_28_aarch64.whl" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" }, { name = "vllm", version = "0.22.0+cu129", source = { url = "https://github.com/vllm-project/vllm/releases/download/v0.22.0/vllm-0.22.0+cu129-cp38-abi3-manylinux_2_28_x86_64.whl" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "wandb", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -3584,7 +4006,6 @@ envs = [ { name = "math-python", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "math500", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "mini-swe-agent-plus", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "mini-swe-agent-plus-rlm", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "mmlu-pro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "opencode-cp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "opencode-deepdive", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -3663,7 +4084,6 @@ requires-dist = [ { name = "math-python", marker = "extra == 'envs'", editable = "deps/verifiers/environments/math_python" }, { name = "math500", marker = "extra == 'envs'", editable = "deps/research-environments/environments/math500" }, { name = "mini-swe-agent-plus", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mini_swe_agent_plus" }, - { name = "mini-swe-agent-plus-rlm", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mini_swe_agent_plus_rlm" }, { name = "mmlu-pro", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mmlu_pro" }, { name = "modelexpress", marker = "extra == 'modelexpress'", specifier = "==0.3.0" }, { name = "mooncake-transfer-engine", specifier = ">=0.3.10.post2" }, @@ -3879,6 +4299,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" }, ] +[[package]] +name = "py-key-value-aio" +version = "0.4.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beartype", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/e2/d689d922894a7ecde73b6daeaf9b13dab5aae06fe6aaaf7514722644d382/py_key_value_aio-0.4.5.tar.gz", hash = "sha256:c6563a2c6abe5da5e20f4f9e875c2a9b425a2244a54fadbf46cf140a9eea45d7", size = 107547, upload-time = "2026-05-27T16:37:08.107Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/95/b8ba862968712caa12a19666175334fa979e1f198b896a430adb3bacfe87/py_key_value_aio-0.4.5-py3-none-any.whl", hash = "sha256:ab862adbcb8c72547d1c57821f22cbbb71ab86509039c96f36e914e0336c8dd7", size = 170005, upload-time = "2026-05-27T16:37:06.629Z" }, +] + +[package.optional-dependencies] +filetree = [ + { name = "aiofile", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +keyring = [ + { name = "keyring", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +memory = [ + { name = "cachetools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + [[package]] name = "pyarrow" version = "24.0.0" @@ -3891,6 +4336,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490, upload-time = "2026-04-21T10:47:55.981Z" }, ] +[[package]] +name = "pyasn1" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, +] + [[package]] name = "pybase64" version = "1.4.3" @@ -4004,6 +4470,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/8d/f1af3832f5e6eb13ba94ee809e72b8ecb5eef226d27ee0bef7d963d943c7/pydantic_settings-2.14.1-py3-none-any.whl", hash = "sha256:6e3c7edfd8277687cdc598f56e5cff0e9bfff0910a3749deaa8d4401c3a2b9de", size = 60964, upload-time = "2026-05-08T13:40:04.958Z" }, ] +[[package]] +name = "pydub" +version = "0.25.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" }, +] + [[package]] name = "pyelftools" version = "0.32" @@ -4052,6 +4527,17 @@ crypto = [ { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] +[[package]] +name = "pymupdf" +version = "1.27.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/22/32/708bedc9dde7b328d45abbc076091769d44f2f24ad151ad92d56a6ec142b/pymupdf-1.27.2.3.tar.gz", hash = "sha256:7a92faa25129e8bbec5e50eeb9214f187665428c31b05c4ef6e36c58c0b1c6d2", size = 85759618, upload-time = "2026-04-24T14:13:14.42Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/26/b7e5a70eb83bd189f8b5df87ec442746b992f2f632662839b288170d357d/pymupdf-1.27.2.3-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1dd460a3ae4597a755f00a3bd9771f5ebf1531dc111f6a36bf05dd00a6b84425", size = 24333923, upload-time = "2026-04-24T14:09:47.341Z" }, + { url = "https://files.pythonhosted.org/packages/e4/a0/aa1ee2240f29481a04a827c313333b4ecd8a14d6ac3e15d3f41a30574781/pymupdf-1.27.2.3-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:857842b4888827bd6155a1131341b2822a7ebe9a8c15a975fd7d490d7a64a30c", size = 24963198, upload-time = "2026-04-24T14:10:07.408Z" }, + { url = "https://files.pythonhosted.org/packages/69/49/4f742451f980840829fc00ba158bebb25d389c846d8f4f8c65936ee55de8/pymupdf-1.27.2.3-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:580983849c64a08d08344ca3d1580e87c01f046a8392421797bc850efd72a5b6", size = 25184609, upload-time = "2026-04-24T14:10:22.911Z" }, +] + [[package]] name = "pynacl" version = "1.6.2" @@ -4080,6 +4566,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] +[[package]] +name = "pyperclip" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" }, +] + [[package]] name = "pypika" version = "0.51.1" @@ -4197,6 +4692,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/cb/769cfc37177252872a45a71f3fbdde9d51b471a3f3c14bfe95dde3407386/python_multipart-0.0.29-py3-none-any.whl", hash = "sha256:2ddcc971cef266225f54f552d8fa10bcfbb1f14446caec199060daac59ff2d69", size = 29640, upload-time = "2026-05-17T17:29:45.69Z" }, ] +[[package]] +name = "pytz" +version = "2026.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ff/46/dd499ec9038423421951e4fad73051febaa13d2df82b4064f87af8b8c0c3/pytz-2026.2.tar.gz", hash = "sha256:0e60b47b29f21574376f218fe21abc009894a2321ea16c6754f3cad6eb7cdd6a", size = 320861, upload-time = "2026-05-04T01:35:29.667Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/dd/96da98f892250475bdf2328112d7468abdd4acc7b902b6af23f4ed958ea0/pytz-2026.2-py2.py3-none-any.whl", hash = "sha256:04156e608bee23d3792fd45c94ae47fae1036688e75032eea2e3bf0323d1f126", size = 510141, upload-time = "2026-05-04T01:35:27.408Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3" @@ -4347,7 +4851,7 @@ version = "0.1.4" source = { editable = "deps/verifiers/environments/reverse_text" } dependencies = [ { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -4369,6 +4873,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, ] +[[package]] +name = "rich-rst" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pygments", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/56/3191bae66b08ccc637ea8120426068bcb361cc323c96404c310886937067/rich_rst-2.0.1.tar.gz", hash = "sha256:cbe236ed0901d1ec8427cc6a50bf0a34353ba28ad014dc24def68bfe7f3b9e68", size = 300570, upload-time = "2026-05-16T00:47:57.362Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/3d/55c17d3ebdf3cd81356002afe5bef9bb8af631db2819785b6eac845b925b/rich_rst-2.0.1-py3-none-any.whl", hash = "sha256:7ee15f345ce25fa02b582c272a6cdbaf0c21243e38061cea273cff659bf3ef61", size = 272922, upload-time = "2026-05-16T00:47:55.508Z" }, +] + [[package]] name = "rich-toolkit" version = "0.19.10" @@ -4406,13 +4923,13 @@ wheels = [ [[package]] name = "rlm-swe" -version = "0.4.2" +version = "0.4.4" source = { editable = "deps/research-environments/environments/rlm_swe" } dependencies = [ { name = "multi-swe-bench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -4420,7 +4937,7 @@ requires-dist = [ { name = "multi-swe-bench", specifier = ">=1.1.2" }, { name = "prime-sandboxes", specifier = ">=0.2.19" }, { name = "swebench", specifier = "==4.1.0" }, - { name = "verifiers", specifier = ">=0.1.13.dev8" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -4447,6 +4964,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/c5/8864e4e7925b836ea354b31d57641ec03830564e281a8b6f061f8c3e0ec1/ruff-0.15.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:bb01d645694e3ec0102105d07ef2d53703970407d59c04e59d3ba0b7a1d53553", size = 11560214, upload-time = "2026-05-21T14:34:50.975Z" }, ] +[[package]] +name = "safehttpx" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/89/d1/4282284d9cf1ee873607a46442da977fc3c985059315ab23610be31d5885/safehttpx-0.1.7.tar.gz", hash = "sha256:db201c0978c41eddb8bb480f3eee59dd67304fdd91646035e9d9a720049a9d23", size = 10385, upload-time = "2025-10-24T18:30:09.783Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/a3/0f0b7d78e2f1eb9e8e1afbff1d2bff8d60144aee17aca51c065b516743dd/safehttpx-0.1.7-py3-none-any.whl", hash = "sha256:c4f4a162db6993464d7ca3d7cc4af0ffc6515a606dfd220b9f82c6945d869cde", size = 8959, upload-time = "2025-10-24T18:30:08.733Z" }, +] + [[package]] name = "safetensors" version = "0.7.0" @@ -4465,13 +4994,13 @@ version = "0.1.4" source = { editable = "deps/research-environments/environments/science_env" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -4519,6 +5048,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" }, ] +[[package]] +name = "secretstorage" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jeepney", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, +] + +[[package]] +name = "semantic-version" +version = "2.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/31/f2289ce78b9b473d582568c234e104d2a342fd658cc288a7553d83bb8595/semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c", size = 52289, upload-time = "2022-05-26T13:35:23.454Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552, upload-time = "2022-05-26T13:35:21.206Z" }, +] + [[package]] name = "sentencepiece" version = "0.2.1" @@ -4577,11 +5128,11 @@ name = "simpleqa-verified" version = "0.1.2" source = { editable = "deps/research-environments/environments/simpleqa_verified" } dependencies = [ - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] -requires-dist = [{ name = "verifiers", specifier = ">=0.1.15.dev1" }] +requires-dist = [{ name = "verifiers", specifier = ">=0.1.15.dev17" }] [[package]] name = "six" @@ -4621,15 +5172,14 @@ wheels = [ [[package]] name = "sse-starlette" -version = "3.4.4" +version = "2.3.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f7/2b/58abc2d1fd397e7dde08e947e05c884d8ef2f78d5e2588c17a12d42d6994/sse_starlette-3.4.4.tar.gz", hash = "sha256:07e0fa0460138baf25cdd5fb28683472c3995dc1642225191b3832d62526bcb0", size = 31819, upload-time = "2026-05-12T17:37:17.019Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/f4/989bc70cb8091eda43a9034ef969b25145291f3601703b82766e5172dfed/sse_starlette-2.3.6.tar.gz", hash = "sha256:0382336f7d4ec30160cf9ca0518962905e1b69b72d6c1c995131e0a703b436e3", size = 18284, upload-time = "2025-05-30T13:34:12.914Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/67/805710444ea8cc75fbf70b920ed431a560c4bf9c57f7d5a3117213189399/sse_starlette-3.4.4-py3-none-any.whl", hash = "sha256:3f4dd50d8aed2771a091f3a83000323fc3844541c16b4fe585ae2420cc6df973", size = 16514, upload-time = "2026-05-12T17:37:15.601Z" }, + { url = "https://files.pythonhosted.org/packages/81/05/78850ac6e79af5b9508f8841b0f26aa9fd329a1ba00bf65453c2d312bcc8/sse_starlette-2.3.6-py3-none-any.whl", hash = "sha256:d49a8285b182f6e2228e2609c350398b2ca2c36216c2675d875f81e93548f760", size = 10606, upload-time = "2025-05-30T13:34:11.703Z" }, ] [[package]] @@ -4659,6 +5209,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, ] +[[package]] +name = "structlog" +version = "25.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" }, +] + [[package]] name = "supervisor" version = "4.3.0" @@ -4745,6 +5304,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" }, ] +[[package]] +name = "tasksets" +source = { editable = "deps/verifiers/packages/tasksets" } +dependencies = [ + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[package.optional-dependencies] +openenv = [ + { name = "openenv-core", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +openreward = [ + { name = "openreward", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +ta = [ + { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "textarena", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[package.metadata] +requires-dist = [ + { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" }, + { name = "nltk", marker = "extra == 'ta'" }, + { name = "openenv-core", marker = "extra == 'openenv'", specifier = ">=0.3.0" }, + { name = "openreward", marker = "python_full_version >= '3.11' and extra == 'openreward'", specifier = ">=0.1.123" }, + { name = "textarena", marker = "extra == 'ta'" }, + { name = "verifiers", specifier = ">=0.1.15.dev11" }, +] +provides-extras = ["nemogym", "openenv", "openreward", "ta"] + [[package]] name = "tau2" version = "0.2.1.dev0" @@ -4784,13 +5373,13 @@ version = "0.2.3" source = { editable = "deps/research-environments/environments/tau2_bench" } dependencies = [ { name = "tau2", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "tau2", git = "https://github.com/sierra-research/tau2-bench.git?rev=337326e" }, - { name = "verifiers", specifier = ">=0.1.15.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -5012,11 +5601,11 @@ wheels = [ [[package]] name = "tomlkit" -version = "0.15.0" +version = "0.14.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/db/03eaf4331631ef6b27d6e3c9b68c54dc6f0d63d87201fed600cc409307fd/tomlkit-0.15.0.tar.gz", hash = "sha256:7d1a9ecba3086638211b13814ea79c90dd54dd11993564376f3aa92271f5c7a3", size = 161875, upload-time = "2026-05-10T07:38:22.245Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/af/14b24e41977adb296d6bd1fb59402cf7d60ce364f90c890bd2ec65c43b5a/tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064", size = 187167, upload-time = "2026-01-13T01:14:53.304Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/43/8bd850ee71a191bf072e31302c73a66be413fecdd98fdcd111ecbcce13ca/tomlkit-0.15.0-py3-none-any.whl", hash = "sha256:4dbc8f0fc024412b57ced8757ac7461305126a648ff8c2c807fcb8e133a78738", size = 41328, upload-time = "2026-05-10T07:38:23.517Z" }, + { url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310, upload-time = "2026-01-13T01:14:51.965Z" }, ] [[package]] @@ -5280,6 +5869,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/61/73/d21edf5b204d1467e06500080a50f79d49ef2b997c79123a536d4a17d97c/uc_micro_py-2.0.0-py3-none-any.whl", hash = "sha256:3603a3859af53e5a39bc7677713c78ea6589ff188d70f4fee165db88e22b242c", size = 6383, upload-time = "2026-03-01T06:31:26.257Z" }, ] +[[package]] +name = "uncalled-for" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/82/345cc927f7fbdae6065e7768759932fcc827fc20b29b45dfbafa2f1f7da4/uncalled_for-0.3.2.tar.gz", hash = "sha256:89f5dbcd71e2b8f47c030b1fa302e6cce2ec795d1ac565eeb6525c5fe55cb8a2", size = 50032, upload-time = "2026-05-06T13:38:25.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/25/2c87754f3a9e692315f7b811244090e68f362979fc8886b3fbd2985a1d8c/uncalled_for-0.3.2-py3-none-any.whl", hash = "sha256:0ff60b142c7d1f8070bde9d42afaa70aedc77dcc10998c227687e9c15713418e", size = 11444, upload-time = "2026-05-06T13:38:24.025Z" }, +] + [[package]] name = "unidiff" version = "0.7.5" @@ -5337,8 +5935,10 @@ wheels = [ name = "verifiers" source = { editable = "deps/verifiers" } dependencies = [ + { name = "aiohttp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "aiolimiter", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "anthropic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "certifi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "gepa", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -5350,10 +5950,12 @@ dependencies = [ { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "openai-agents", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-pydantic-config", extra = ["toml"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-tunnel", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pymupdf", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pyzmq", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "regex", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -5364,12 +5966,20 @@ dependencies = [ { name = "uvloop", marker = "(platform_machine == 'aarch64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux')" }, ] +[package.optional-dependencies] +packages = [ + { name = "harnesses", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tasksets", extra = ["openenv", "openreward", "ta"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + [package.metadata] requires-dist = [ { name = "accelerate", marker = "extra == 'rl'", specifier = ">=1.4.0" }, + { name = "aiohttp", specifier = ">=3.9.0" }, { name = "aiohttp", marker = "extra == 'browser'", specifier = ">=3.9.0" }, { name = "aiolimiter", specifier = ">=1.2.1" }, { name = "anthropic", specifier = ">=0.78.0" }, + { name = "certifi" }, { name = "datasets", specifier = ">=3.0.0,<4.7.0" }, { name = "deepspeed", marker = "extra == 'rl'", specifier = ">=0.17.6" }, { name = "flash-attn", marker = "extra == 'rl'", specifier = ">=2.8.3" }, @@ -5389,10 +5999,12 @@ requires-dist = [ { name = "openai", specifier = ">=1.108.1" }, { name = "openai-agents", specifier = ">=0.0.7" }, { name = "peft", marker = "extra == 'rl'" }, + { name = "pillow" }, { name = "prime-pydantic-config", extras = ["toml"] }, { name = "prime-sandboxes", specifier = ">=0.2.25" }, { name = "prime-tunnel", specifier = ">=0.1.6" }, { name = "pydantic", specifier = ">=2.11.9" }, + { name = "pymupdf" }, { name = "python-dotenv", marker = "extra == 'browser'", specifier = ">=1.0.0" }, { name = "pyzmq", specifier = ">=27.1.0" }, { name = "reasoning-gym", marker = "extra == 'rg'" }, @@ -5967,7 +6579,7 @@ dependencies = [ { name = "chromadb", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "datasets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -5985,7 +6597,7 @@ source = { editable = "deps/verifiers/environments/wordle" } dependencies = [ { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "textarena", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] From 77b85673702b8ac939a5c2e105de54a676276dbe Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 9 Jun 2026 20:48:06 -0700 Subject: [PATCH 12/12] explicit del and malloc --- src/prime_rl/orchestrator/orchestrator.py | 50 +++++++++++++++-------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/src/prime_rl/orchestrator/orchestrator.py b/src/prime_rl/orchestrator/orchestrator.py index 9823dd73d0..d8595d326e 100644 --- a/src/prime_rl/orchestrator/orchestrator.py +++ b/src/prime_rl/orchestrator/orchestrator.py @@ -22,6 +22,7 @@ import asyncio import ctypes +import gc import logging import os import time @@ -109,6 +110,14 @@ TARGET_LAG = 1 +def _release_unused_memory() -> None: + gc.collect() + try: + ctypes.CDLL("libc.so.6").malloc_trim(0) + except (OSError, AttributeError) as e: + get_logger().debug(f"malloc_trim(0) unavailable: {e}") + + class Orchestrator: # Set in ``__init__`` config: OrchestratorConfig @@ -476,10 +485,7 @@ async def start(self) -> None: get_logger().success("Orchestrator finished.") else: get_logger().warning("Orchestrator cleanup complete (forced).") - try: - ctypes.CDLL("libc.so.6").malloc_trim(0) - except Exception as e: - get_logger().debug(f"malloc_trim(0) failed: {e}") + _release_unused_memory() async def main_loop(self) -> None: """Consume ``FinishedRollout``\\ s from the dispatcher and route them @@ -496,19 +502,29 @@ async def main_loop(self) -> None: except asyncio.TimeoutError: continue - if isinstance(rollout, EvalRollout): - assert self.eval_sink is not None # eval rollouts only emitted when eval is configured - eval_batch = self.eval_sink.add(rollout) - if eval_batch is not None: - await self.finalize_eval_batch(eval_batch) - continue - - assert isinstance(rollout, TrainRollout) - train_batch = await self.train_sink.add(rollout) - # In drain mode any late-arriving train batch is dropped — we - # don't want to ship past ``max_steps`` - if train_batch is not None and not self.draining and not self.stopped.is_set(): - await self.finalize_train_batch(train_batch) + batch = None + should_release_memory = False + try: + if isinstance(rollout, EvalRollout): + assert self.eval_sink is not None # eval rollouts only emitted when eval is configured + batch = self.eval_sink.add(rollout) + if batch is not None: + should_release_memory = True + await self.finalize_eval_batch(batch) + continue + + assert isinstance(rollout, TrainRollout) + batch = await self.train_sink.add(rollout) + # In drain mode any late-arriving train batch is dropped — we + # don't want to ship past ``max_steps`` + if batch is not None: + should_release_memory = True + if batch is not None and not self.draining and not self.stopped.is_set(): + await self.finalize_train_batch(batch) + finally: + del batch, rollout + if should_release_memory: + _release_unused_memory() async def finalize_train_batch(self, batch: TrainBatch) -> None: """Ship one ``TrainBatch`` out to the trainer and handle the I/O