Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ rest of the cells run as-is.

| Tutorial | Summary | Difficulty | Framework | Launch |
|---|---|---|---|---|
| [`001_qwen27b`](tutorials/singlenode/001_qwen27b/001_qwen27b.ipynb) | Train Qwen3.6-27B on DAPO-math with GRPO | Advanced | `slime` | <a href="https://modal.com/notebooks/new/https://github.com/modal-projects/training-gym/blob/main/tutorials/singlenode/001_qwen27b/001_qwen27b.ipynb" target="_blank" rel="nofollow noopener noreferrer"><img src="https://modal-cdn.com/open-in-modal.svg" alt="Open in Modal"></a> |
| [`000_qwen35b`](tutorials/singlenode/000_qwen35b/000_qwen35b.ipynb) | Train Qwen3.6-35B-A3B on DAPO-math with GRPO | Advanced | `slime` | <a href="https://modal.com/notebooks/new/https://github.com/modal-projects/training-gym/blob/main/tutorials/singlenode/000_qwen35b/000_qwen35b.ipynb" target="_blank" rel="nofollow noopener noreferrer"><img src="https://modal-cdn.com/open-in-modal.svg" alt="Open in Modal"></a> |

### Agents
Expand Down
4 changes: 2 additions & 2 deletions docs-next/astro.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ export default defineConfig({
{ label: 'Qwen3-14B', link: '/reference/models/qwen3_14b/' },
{ label: 'Qwen3-30B-A3B', link: '/reference/models/qwen3_30b/' },
{ label: 'Qwen3-32B', link: '/reference/models/qwen3_32b/' },
{ label: 'Qwen3.6-27B', link: '/reference/models/qwen3_6_27b/' },

{ label: 'Qwen3.6-35B-A3B', link: '/reference/models/qwen3_6_35b/' },
],
},
Expand All @@ -182,7 +182,7 @@ export default defineConfig({
{ label: 'TrainConfig', link: '/reference/training/trainconfig/' },
{ label: 'MultiTurn', link: '/reference/training/multiturn/' },
{ label: 'SlimeRecipe', link: '/reference/training/slimerecipe/' },
{ label: 'Qwen3_6_27b_Recipe', link: '/reference/training/qwen3_6_27b_recipe/' },

{ label: 'Qwen3_6_35b_Recipe', link: '/reference/training/qwen3_6_35b_recipe/' },
],
},
Expand Down
5 changes: 1 addition & 4 deletions modal_training_gym/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
"Qwen3_14B": ("modal_training_gym.common.models", "Qwen3_14B"),
"Qwen3_30B": ("modal_training_gym.common.models", "Qwen3_30B"),
"Qwen3_32B": ("modal_training_gym.common.models", "Qwen3_32B"),
"Qwen3_6_27B": ("modal_training_gym.common.models", "Qwen3_6_27B"),
"Qwen3_6_35B": ("modal_training_gym.common.models", "Qwen3_6_35B"),
"Qwen3_ASR_1_7B": ("modal_training_gym.common.models", "Qwen3_ASR_1_7B"),
"Qwen3_ASR_1_7b_Recipe": (
Expand Down Expand Up @@ -116,9 +115,7 @@
"Qwen3_14B",
"Qwen3_30B",
"Qwen3_32B",
"Qwen3_6_27B",
"Qwen3_6_35B",
"Qwen3_ASR_1_7B",
"Qwen3_6_R_1_7B",
"Qwen3_ASR_1_7b_Recipe",
"score_in_sandbox",
"SlimeRecipe",
Expand Down
3 changes: 1 addition & 2 deletions modal_training_gym/common/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .qwen3_32b import Qwen3_32B
from .kimi_k2_5 import Kimi_K2_5
from .kimi_k2_6 import Kimi_K2_6
from .qwen3_6_27b import Qwen3_6_27B

from .qwen3_6_35b import Qwen3_6_35B
from .qwen3_asr_1_7b import Qwen3_ASR_1_7B

Expand All @@ -43,7 +43,6 @@
"parse_kimi_k2_response",
"parse_qwen3_6_response",
"parse_qwen3_response",
"Qwen3_6_27B",
"Qwen3_6_35B",
"Qwen3_ASR_1_7B",
]
5 changes: 1 addition & 4 deletions modal_training_gym/deploy_recipes/sglang_recipe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
from modal_training_gym.deploy_recipes.sglang_recipe.qwen3_32b import (
Qwen3_32b_SglangRecipe,
)
from modal_training_gym.deploy_recipes.sglang_recipe.qwen3_6_27b import (
Qwen3_6_27b_SglangRecipe,
)

from modal_training_gym.deploy_recipes.sglang_recipe.qwen3_6_35b import (
Qwen3_6_35b_SglangRecipe,
)
Expand All @@ -44,6 +42,5 @@
"Qwen3_14b_SglangRecipe",
"Qwen3_30b_SglangRecipe",
"Qwen3_32b_SglangRecipe",
"Qwen3_6_27b_SglangRecipe",
"Qwen3_6_35b_SglangRecipe",
]
5 changes: 1 addition & 4 deletions modal_training_gym/deploy_recipes/vllm_recipe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
from modal_training_gym.deploy_recipes.vllm_recipe.qwen3_14b import Qwen3_14b_VllmRecipe
from modal_training_gym.deploy_recipes.vllm_recipe.qwen3_30b import Qwen3_30b_VllmRecipe
from modal_training_gym.deploy_recipes.vllm_recipe.qwen3_32b import Qwen3_32b_VllmRecipe
from modal_training_gym.deploy_recipes.vllm_recipe.qwen3_6_27b import (
Qwen3_6_27b_VllmRecipe,
)

from modal_training_gym.deploy_recipes.vllm_recipe.qwen3_6_35b import (
Qwen3_6_35b_VllmRecipe,
)
Expand All @@ -26,6 +24,5 @@
"Qwen3_14b_VllmRecipe",
"Qwen3_30b_VllmRecipe",
"Qwen3_32b_VllmRecipe",
"Qwen3_6_27b_VllmRecipe",
"Qwen3_6_35b_VllmRecipe",
]
2 changes: 0 additions & 2 deletions modal_training_gym/frameworks/slime/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@
"patch_rollout_status_reporting", _SLIME_PATCHES
)
_PATCH_LOG_ELIDE_B64 = encode_patch("patch_log_elide", _SLIME_PATCHES)
_PATCH_CP_LOG_ROLLOUT_B64 = encode_patch("patch_cp_log_rollout", _SLIME_PATCHES)


def _build_slime_base_image() -> "Image":
Expand All @@ -123,7 +122,6 @@ def _build_slime_base_image() -> "Image":
f"echo {_PATCH_QWEN3_ASR_EXPORT_B64} | base64 -d | python3",
f"echo {_PATCH_ROLLOUT_STATUS_B64} | base64 -d | python3",
f"echo {_PATCH_LOG_ELIDE_B64} | base64 -d | python3",
f"echo {_PATCH_CP_LOG_ROLLOUT_B64} | base64 -d | python3",
)
)

Expand Down
5 changes: 2 additions & 3 deletions modal_training_gym/frameworks/slime/modal_helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ def get_checkpoint_conversion_policy(
getattr(slime_cfg, "conversion_tensor_model_parallel_size", None)
or slime_cfg.tensor_model_parallel_size
)
pp = (
getattr(slime_cfg, "conversion_pipeline_model_parallel_size", None)
or getattr(slime_cfg, "pipeline_model_parallel_size", 1)
pp = getattr(slime_cfg, "conversion_pipeline_model_parallel_size", None) or getattr(
slime_cfg, "pipeline_model_parallel_size", 1
)

if tp == 1 and pp == 1 and getattr(slime_cfg, "mtp_num_layers", 0):
Expand Down
5 changes: 1 addition & 4 deletions modal_training_gym/train_recipes/slime_recipe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
from modal_training_gym.train_recipes.slime_recipe.qwen3_14b import Qwen3_14b_Recipe
from modal_training_gym.train_recipes.slime_recipe.qwen3_32b import Qwen3_32b_Recipe
from modal_training_gym.train_recipes.slime_recipe.qwen3_4b import Qwen3_4b_Recipe
from modal_training_gym.train_recipes.slime_recipe.qwen3_6_27b import (
Qwen3_6_27b_Recipe,
)

from modal_training_gym.train_recipes.slime_recipe.qwen3_6_35b import (
Qwen3_6_35b_Recipe,
)
Expand All @@ -29,7 +27,6 @@
"Qwen3_8b_Recipe",
"Qwen3_14b_Recipe",
"Qwen3_32b_Recipe",
"Qwen3_6_27b_Recipe",
"Qwen3_6_35b_Recipe",
"Qwen3_ASR_1_7b_Recipe",
]
6 changes: 1 addition & 5 deletions modal_training_gym/train_recipes/slime_recipe/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,9 +549,7 @@ def get_base_recipe(cls, model_config: ModelConfig) -> "SlimeRecipe | None":
from modal_training_gym.train_recipes.slime_recipe.qwen3_4b import (
Qwen3_4b_Recipe,
)
from modal_training_gym.train_recipes.slime_recipe.qwen3_6_27b import (
Qwen3_6_27b_Recipe,
)

from modal_training_gym.train_recipes.slime_recipe.qwen3_6_35b import (
Qwen3_6_35b_Recipe,
)
Expand All @@ -573,8 +571,6 @@ def get_base_recipe(cls, model_config: ModelConfig) -> "SlimeRecipe | None":
return Qwen3_14b_Recipe()
if model_config.model_name == "Qwen/Qwen3-32B":
return Qwen3_32b_Recipe()
if model_config.model_name == "Qwen/Qwen3.6-27B":
return Qwen3_6_27b_Recipe()
if model_config.model_name == "Qwen/Qwen3.6-35B-A3B":
return Qwen3_6_35b_Recipe()
return None
5 changes: 4 additions & 1 deletion modal_training_gym/utils/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class MetadataStore(Enum):
SUMMARY_KEY = "summary"
SUMMARY_ITEMS_KEY = "items"


# Summary stores whose canonical per-item files share the summary's shape, so a
# collapsed/stale summary can be rebuilt from the canonical files rather than
# trusted blindly. Rollouts are intentionally excluded: their canonical files
Expand Down Expand Up @@ -247,7 +248,9 @@ def vol_count_items(store: MetadataStore | str) -> int:
vol = _metadata_volume()
_safe_reload(vol)
try:
return sum(1 for e in vol.iterdir(_store_path(store)) if e.path.endswith(".json"))
return sum(
1 for e in vol.iterdir(_store_path(store)) if e.path.endswith(".json")
)
except FileNotFoundError:
return 0

Expand Down
14 changes: 0 additions & 14 deletions scripts/api_reference_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,6 @@
"class_type": "config_data",
"sidebar_label": "Qwen3-32B",
},
{
"class_name": "Qwen3_6_27B",
"module": "modal_training_gym.common.models.qwen3_6_27b",
"group": "models",
"class_type": "config_data",
"sidebar_label": "Qwen3.6-27B",
},
{
"class_name": "Qwen3_6_35B",
"module": "modal_training_gym.common.models.qwen3_6_35b",
Expand Down Expand Up @@ -206,13 +199,6 @@
"class_type": "config_data",
"sidebar_label": "SlimeRecipe",
},
{
"class_name": "Qwen3_6_27b_Recipe",
"module": "modal_training_gym.train_recipes.slime_recipe.qwen3_6_27b",
"group": "training",
"class_type": "config_data",
"sidebar_label": "Qwen3_6_27b_Recipe",
},
{
"class_name": "Qwen3_6_35b_Recipe",
"module": "modal_training_gym.train_recipes.slime_recipe.qwen3_6_35b",
Expand Down
Loading