From db8e57a1c209755381cf65d45dba9ef467e98ec8 Mon Sep 17 00:00:00 2001 From: Sami Jaghouar Date: Mon, 8 Jun 2026 21:47:15 +0000 Subject: [PATCH 1/2] fix: add prime-pydantic-config as direct dep so uv uses editable path source The [tool.uv.sources] override for prime-pydantic-config was being ignored because it was only a transitive dependency (via prime-rl-configs). uv only applies source overrides for packages that appear in project.dependencies. Adding it as a direct dependency makes uv resolve from the local editable path (deps/pydantic-config) instead of PyPI. --- pyproject.toml | 1 + uv.lock | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 625a879d5a..b5f2a2d2dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" requires-python = "~=3.12.0" dependencies = [ "prime-rl-configs", + "prime-pydantic-config", "beartype>=0.21.0", "datasets>=4.0.0", "jaxtyping>=0.3.2", diff --git a/uv.lock b/uv.lock index 5c15a1668a..03e1a1ddae 100644 --- a/uv.lock +++ b/uv.lock @@ -3476,21 +3476,35 @@ wheels = [ [[package]] name = "prime-pydantic-config" -version = "0.3.0.dev86" -source = { registry = "https://pypi.org/simple" } +version = "0.3.0" +source = { editable = "deps/pydantic-config" } dependencies = [ { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ad/34/006fc720a8fcda84706793582d50a2028bf6950fb7a0eedb59d3f6555261/prime_pydantic_config-0.3.0.dev86.tar.gz", hash = "sha256:1139bb6d21a8cf134e212ee4e529e5150f2db7422b42eae3ca69a5c77b8a69f5", size = 75656, upload-time = "2026-06-02T01:08:19.079Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/a3/ded48c436cd56ddac3b216458ac458eaf069b55e0ca3be506b2508d16fa2/prime_pydantic_config-0.3.0.dev86-py3-none-any.whl", hash = "sha256:51ac33ae1b5de9ba2e44eb9a91242d9dd783784234942f166f6e8974bcdf1577", size = 27437, upload-time = "2026-06-02T01:08:20.23Z" }, -] [package.optional-dependencies] toml = [ { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] +[package.metadata] +requires-dist = [ + { name = "pydantic", specifier = ">=2.0.0" }, + { name = "pyyaml", marker = "extra == 'all'" }, + { name = "pyyaml", marker = "extra == 'yaml'" }, + { name = "tomli", marker = "extra == 'all'" }, + { name = "tomli", marker = "extra == 'toml'" }, +] +provides-extras = ["yaml", "toml", "all"] + +[package.metadata.requires-dev] +dev = [ + { name = "pre-commit", specifier = ">=3.0.0" }, + { name = "pytest", specifier = ">=9.0.3" }, + { name = "rich", specifier = ">=15.0.0" }, + { name = "ruff", specifier = ">=0.12.1" }, +] + [[package]] name = "prime-rl" version = "0.5.0" @@ -3509,6 +3523,7 @@ dependencies = [ { name = "nvidia-ml-py", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "prime-pydantic-config", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "prime-rl-configs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pyarrow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pybase64", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -3662,6 +3677,7 @@ requires-dist = [ { name = "opencode-science", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_science" }, { name = "opencode-swe", marker = "extra == 'envs'", editable = "deps/research-environments/environments/opencode_swe" }, { name = "prime", specifier = ">=0.6.4" }, + { name = "prime-pydantic-config", editable = "deps/pydantic-config" }, { name = "prime-rl", extras = ["disagg"], marker = "extra == 'all'" }, { name = "prime-rl", extras = ["flash-attn"], marker = "extra == 'all'" }, { name = "prime-rl", extras = ["flash-attn-3"], marker = "extra == 'all'" }, From 5af36f5cb8c4982fb9e9d9558a0532c00a7a2ea8 Mon Sep 17 00:00:00 2001 From: Sami Jaghouar Date: Mon, 8 Jun 2026 22:30:53 +0000 Subject: [PATCH 2/2] feat: add env_prefix to prime-rl config classes Add env_prefix ClassVar to each entrypoint config class, backed by the new env var injection in pydantic-config's BaseConfig.model_validate. Env prefixes: - RLConfig -> PRIME_RL_ - OrchestratorConfig -> PRIME_RL_ORCH_ - TrainerConfig -> PRIME_RL_TRAINER_ - InferenceConfig -> PRIME_RL_INFER_ - SFTConfig -> PRIME_RL_SFT_ - EnvServerConfig -> PRIME_RL_ENV_SERVER_ Also fix inference.py to import BaseConfig from prime_rl.utils.config for consistency with other config files. Also add prime-pydantic-config as a direct dependency in project.dependencies so uv applies the [tool.uv.sources] path override (editable install from deps/pydantic-config instead of PyPI). --- deps/pydantic-config | 2 +- .../src/prime_rl/configs/env_server.py | 2 + .../src/prime_rl/configs/inference.py | 6 +- .../src/prime_rl/configs/orchestrator.py | 3 +- .../src/prime_rl/configs/rl.py | 3 +- .../src/prime_rl/configs/sft.py | 3 +- .../src/prime_rl/configs/trainer.py | 3 +- tests/unit/utils/test_config.py | 79 +++++++++++++++++++ 8 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 tests/unit/utils/test_config.py diff --git a/deps/pydantic-config b/deps/pydantic-config index 896ade4e69..3bd1f18884 160000 --- a/deps/pydantic-config +++ b/deps/pydantic-config @@ -1 +1 @@ -Subproject commit 896ade4e69d8d8dff2d4b0a431b7e1c7c12d638f +Subproject commit 3bd1f18884c72cc95954b6300c206458d21fc40c diff --git a/packages/prime-rl-configs/src/prime_rl/configs/env_server.py b/packages/prime-rl-configs/src/prime_rl/configs/env_server.py index 50c99adcff..bb637cb4cc 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/env_server.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/env_server.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import ClassVar from pydantic import model_validator @@ -8,6 +9,7 @@ class EnvServerConfig(BaseConfig): + env_prefix: ClassVar[str] = "PRIME_RL_ENV_SERVER_" env: EnvConfig = EnvConfig() log: LogConfig = LogConfig() diff --git a/packages/prime-rl-configs/src/prime_rl/configs/inference.py b/packages/prime-rl-configs/src/prime_rl/configs/inference.py index 9579259366..dc8b6afcd4 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/inference.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/inference.py @@ -1,12 +1,11 @@ from argparse import Namespace from pathlib import Path -from typing import Annotated, Any, Literal, TypeAlias +from typing import Annotated, Any, ClassVar, Literal, TypeAlias from pydantic import Field, model_validator -from pydantic_config import BaseConfig from prime_rl.configs.shared import BaseModelConfig, LogConfig, SlurmConfig -from prime_rl.utils.config import find_package_resource, rgetattr, rsetattr +from prime_rl.utils.config import BaseConfig, find_package_resource, rgetattr, rsetattr from prime_rl.utils.parsers import resolve_reasoning_parser, resolve_tool_call_parser # TODO: Set thinking/ solution budget @@ -270,6 +269,7 @@ class InferenceExperimentalConfig(BaseConfig): class InferenceConfig(BaseConfig): + env_prefix: ClassVar[str] = "PRIME_RL_INFER_" server: ServerConfig = ServerConfig() model: ModelConfig = Field(default_factory=ModelConfig) diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py index be5fe249f3..a2029c3cb8 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py @@ -1,7 +1,7 @@ import math import warnings from pathlib import Path -from typing import Annotated, Any, Literal, TypeAlias +from typing import Annotated, Any, ClassVar, Literal, TypeAlias from pydantic import AliasChoices, Field, model_serializer, model_validator from pydantic_core.core_schema import SerializerFunctionWrapHandler @@ -500,6 +500,7 @@ class RolloutModelConfig(BaseConfig): class OrchestratorConfig(BaseConfig): + env_prefix: ClassVar[str] = "PRIME_RL_ORCH_" training_mode: Literal["rl", "opd", "sft"] = "rl" """Training mode. ``rl``: student generates rollouts, no teacher. ``opd``: student generates rollouts, teacher computes logprobs (teacher_tau > 0). ``sft``: teacher generates rollouts, student inference pool used for evals and weight sync.""" diff --git a/packages/prime-rl-configs/src/prime_rl/configs/rl.py b/packages/prime-rl-configs/src/prime_rl/configs/rl.py index dab46a9ce1..9fa0c227a8 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/rl.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/rl.py @@ -1,6 +1,6 @@ import warnings from pathlib import Path -from typing import Annotated, Any, Literal, TypeAlias +from typing import Annotated, Any, ClassVar, Literal, TypeAlias from pydantic import Field, model_validator @@ -178,6 +178,7 @@ def total_infer_nodes(self) -> int: class RLConfig(BaseConfig): + env_prefix: ClassVar[str] = "PRIME_RL_" trainer: TrainerConfig orchestrator: OrchestratorConfig diff --git a/packages/prime-rl-configs/src/prime_rl/configs/sft.py b/packages/prime-rl-configs/src/prime_rl/configs/sft.py index 56e905cff7..289181d81f 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/sft.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/sft.py @@ -1,6 +1,6 @@ import warnings from pathlib import Path -from typing import Annotated, Literal, TypeAlias +from typing import Annotated, ClassVar, Literal, TypeAlias from pydantic import Field, model_validator from renderers import RendererConfig @@ -171,6 +171,7 @@ class SFTExperimentalConfig(BaseConfig): class SFTConfig(BaseConfig): + env_prefix: ClassVar[str] = "PRIME_RL_SFT_" model: ModelConfig = ModelConfig() tokenizer: TokenizerConfig = TokenizerConfig() diff --git a/packages/prime-rl-configs/src/prime_rl/configs/trainer.py b/packages/prime-rl-configs/src/prime_rl/configs/trainer.py index 00f4e07deb..af4cd6fa6e 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/trainer.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/trainer.py @@ -1,6 +1,6 @@ import warnings from pathlib import Path -from typing import Annotated, Any, Literal, TypeAlias +from typing import Annotated, Any, ClassVar, Literal, TypeAlias from pydantic import Field, model_validator @@ -500,6 +500,7 @@ class TrainerExperimentalConfig(BaseConfig): class TrainerConfig(BaseConfig): + env_prefix: ClassVar[str] = "PRIME_RL_TRAINER_" model: ModelConfig = ModelConfig() tokenizer: TokenizerConfig = TokenizerConfig() diff --git a/tests/unit/utils/test_config.py b/tests/unit/utils/test_config.py new file mode 100644 index 0000000000..4476003f43 --- /dev/null +++ b/tests/unit/utils/test_config.py @@ -0,0 +1,79 @@ +"""Tests for environment variable injection in prime-rl config classes.""" + +import os + +import pytest + +from prime_rl.configs.inference import InferenceConfig +from prime_rl.configs.orchestrator import OrchestratorConfig +from prime_rl.configs.rl import RLConfig +from prime_rl.configs.trainer import TrainerConfig +from prime_rl.utils.config import cli + + +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch): + """Remove any PRIME_RL_* env vars before each test.""" + for key in list(os.environ): + if key.startswith("PRIME_RL_"): + monkeypatch.delenv(key, raising=False) + + +# ── OrchestratorConfig ────────────────────────────────────────────── + + +def test_orch_env_var_batch_size(monkeypatch): + monkeypatch.setenv("PRIME_RL_ORCH_BATCH_SIZE", "512") + config = OrchestratorConfig.model_validate({}) + assert config.batch_size == 512 + + +def test_orch_env_var_nested(monkeypatch): + monkeypatch.setenv("PRIME_RL_ORCH_STUDENT__MODEL__NAME", "Qwen/Qwen3-0.6B") + config = OrchestratorConfig.model_validate({}) + assert config.student.model.name == "Qwen/Qwen3-0.6B" + + +def test_orch_env_var_cli_wins(monkeypatch): + monkeypatch.setenv("PRIME_RL_ORCH_BATCH_SIZE", "512") + config = cli(OrchestratorConfig, args=["--batch-size", "256"]) + assert config.batch_size == 256 + + +# ── TrainerConfig ─────────────────────────────────────────────────── + + +def test_trainer_env_var_max_steps(monkeypatch): + monkeypatch.setenv("PRIME_RL_TRAINER_MAX_STEPS", "1000") + config = TrainerConfig.model_validate({}) + assert config.max_steps == 1000 + + +# ── InferenceConfig ──────────────────────────────────────────────── + + +def test_infer_env_var(monkeypatch): + monkeypatch.setenv("PRIME_RL_INFER_MODEL__MAX_MODEL_LEN", "4096") + config = InferenceConfig.model_validate({}) + assert config.model.max_model_len == 4096 + + +# ── RLConfig ─────────────────────────────────────────────────────── + + +def test_rl_env_var_propagates_to_orchestrator(monkeypatch): + """Env vars on RLConfig (prefix PRIME_RL_) propagate to sub-configs + before the auto_setup_shared_configs validator runs.""" + monkeypatch.setenv("PRIME_RL_ORCHESTRATOR__BATCH_SIZE", "512") + config = RLConfig.model_validate({"trainer": {}, "orchestrator": {}, "inference": {}}) + assert config.orchestrator.batch_size == 512 + + +# ── No prefix leakage ────────────────────────────────────────────── + + +def test_orch_prefix_does_not_leak_to_trainer(monkeypatch): + monkeypatch.setenv("PRIME_RL_ORCH_BATCH_SIZE", "999") + config = TrainerConfig.model_validate({}) + # TrainerConfig has prefix PRIME_RL_TRAINER_; ORCH_ vars shouldn't affect it + assert config.max_steps == TrainerConfig().max_steps