diff --git a/examples/puzzletron/README.md b/examples/puzzletron/README.md
index dce76866d6d..48954a2b773 100644
--- a/examples/puzzletron/README.md
+++ b/examples/puzzletron/README.md
@@ -43,7 +43,7 @@ python -m pytest tests/gpu/torch/puzzletron/test_puzzletron.py -k "Qwen3-8B"
 
 - For this example we are using 2x NVIDIA H100 80GB HBM3 to show multi-GPU steps. You can use also use a single GPU.
 
-- To make use of [Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) and [Nemotron-Post-Training-Dataset-v2](https://huggingface.co/datasets/nvidia/Nemotron-Post-Training-Dataset-v2), you need to accept the terms and conditions for the corresponding model and the dataset in the Huggingface Hub. Log in to the Huggingface Hub and enter your HF token.
+- To make use of [Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) and [Puzzle-KD-Nemotron-Post-Training-Dataset-v2](https://huggingface.co/datasets/nvidia/Puzzle-KD-Nemotron-Post-Training-Dataset-v2), you need to accept the terms and conditions for the corresponding model and the dataset in the Huggingface Hub. Log in to the Huggingface Hub and enter your HF token.
 
 ```bash
 hf auth login --token <your token>
@@ -51,16 +51,18 @@ hf auth login --token <your token>
 
 ## Compress the Model
 
-1. Download and prepare the [Nemotron-Post-Training-Dataset-v2](https://huggingface.co/datasets/nvidia/Nemotron-Post-Training-Dataset-v2).
+1. Download and prepare the dataset.
 
-   dataset split: "code", "math", "stem", "chat", excluding reasoning samples (2.62GB)
+   **Default (recommended):** Use the prebuilt [Puzzle-KD-Nemotron-Post-Training-Dataset-v2](https://huggingface.co/datasets/nvidia/Puzzle-KD-Nemotron-Post-Training-Dataset-v2) (~3 GB disk required).
 
    ```bash
    python -m modelopt.torch.puzzletron.dataset.prepare_dataset \
-      --dataset_name nvidia/Nemotron-Post-Training-Dataset-v2 \
-      --output_dir path/to/Nemotron-Post-Training-Dataset-v2
+      --dataset_name nvidia/Puzzle-KD-Nemotron-Post-Training-Dataset-v2 \
+      --output_dir path/to/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
    ```
 
+   > **Note:** Alternatively, you can derive the dataset from the raw [Nemotron-Post-Training-Dataset-v2](https://huggingface.co/datasets/nvidia/Nemotron-Post-Training-Dataset-v2) by passing `--dataset_name nvidia/Nemotron-Post-Training-Dataset-v2`. This downloads the full raw dataset (~136 GB) before filtering it down to the same ~2.6 GB result. Only do this if you need to reproduce the preprocessing from scratch.
+
 2. Specify the `puzzle_dir`, `input_hf_model_path`, `dataset_path`, `intermediate_size_list`, and `target_memory` arguments in the [llama-3_1-8B_pruneffn_memory.yaml](./configs/llama-3_1-8B_pruneffn_memory/llama-3_1-8B_pruneffn_memory.yaml) configuration file.
 
    - `puzzle_dir` indicates a new directory for saving the resulting model.
diff --git a/examples/puzzletron/configs/gptoss-20b_remove_experts_memory/gptoss-20b_remove_experts_memory.yaml b/examples/puzzletron/configs/gptoss-20b_remove_experts_memory/gptoss-20b_remove_experts_memory.yaml
index 8ed06e95689..fac942e35ac 100644
--- a/examples/puzzletron/configs/gptoss-20b_remove_experts_memory/gptoss-20b_remove_experts_memory.yaml
+++ b/examples/puzzletron/configs/gptoss-20b_remove_experts_memory/gptoss-20b_remove_experts_memory.yaml
@@ -6,7 +6,7 @@ defaults:
 input_hf_model_path: /workspace/hf_models/openai/gpt-oss-20b
 
 # Dataset path for pruning and NAS scoring
-dataset_path: /workspace/datasets/Nemotron-Post-Training-Dataset-v2
+dataset_path: /workspace/datasets/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
 
 # Working directory for compression outputs
 puzzle_dir: /workspace/puzzle_dir
diff --git a/examples/puzzletron/configs/llama-3_1-8B_pruneffn_memory/llama-3_1-8B_pruneffn_memory.yaml b/examples/puzzletron/configs/llama-3_1-8B_pruneffn_memory/llama-3_1-8B_pruneffn_memory.yaml
index ad16dbc5ea0..bfac4ef6944 100644
--- a/examples/puzzletron/configs/llama-3_1-8B_pruneffn_memory/llama-3_1-8B_pruneffn_memory.yaml
+++ b/examples/puzzletron/configs/llama-3_1-8B_pruneffn_memory/llama-3_1-8B_pruneffn_memory.yaml
@@ -6,7 +6,7 @@ defaults:
 input_hf_model_path: /workspace/hf_models/meta-llama/Llama-3.1-8B-Instruct
 
 # Dataset path for pruning and NAS scoring
-dataset_path: /workspace/datasets/Nemotron-Post-Training-Dataset-v2
+dataset_path: /workspace/datasets/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
 
 # Working directory for puzzletron outputs
 puzzle_dir: /workspace/puzzle_dir
diff --git a/examples/puzzletron/configs/llama-3_1-8B_pruneffn_runtime/llama-3_1-8B_pruneffn_runtime.yaml b/examples/puzzletron/configs/llama-3_1-8B_pruneffn_runtime/llama-3_1-8B_pruneffn_runtime.yaml
index 588df25f27d..2ca0f2c16cf 100644
--- a/examples/puzzletron/configs/llama-3_1-8B_pruneffn_runtime/llama-3_1-8B_pruneffn_runtime.yaml
+++ b/examples/puzzletron/configs/llama-3_1-8B_pruneffn_runtime/llama-3_1-8B_pruneffn_runtime.yaml
@@ -6,7 +6,7 @@ defaults:
 input_hf_model_path: /workspace/hf_models/meta-llama/Llama-3.1-8B-Instruct
 
 # Dataset path for pruning and NAS scoring
-dataset_path: /workspace/datasets/Nemotron-Post-Training-Dataset-v2
+dataset_path: /workspace/datasets/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
 
 # Working directory for puzzletron outputs
 puzzle_dir: /workspace/puzzle_dir
diff --git a/examples/puzzletron/configs/llama-3_2-3B_pruneffn_memory/llama-3_2-3B_pruneffn_memory.yaml b/examples/puzzletron/configs/llama-3_2-3B_pruneffn_memory/llama-3_2-3B_pruneffn_memory.yaml
index b5303d318a3..879f1cc4a22 100644
--- a/examples/puzzletron/configs/llama-3_2-3B_pruneffn_memory/llama-3_2-3B_pruneffn_memory.yaml
+++ b/examples/puzzletron/configs/llama-3_2-3B_pruneffn_memory/llama-3_2-3B_pruneffn_memory.yaml
@@ -6,7 +6,7 @@ defaults:
 input_hf_model_path: /workspace/hf_models/meta-llama/Llama-3.2-3B-Instruct
 
 # Dataset path for pruning and NAS scoring
-dataset_path: /workspace/datasets/Nemotron-Post-Training-Dataset-v2
+dataset_path: /workspace/datasets/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
 
 # Working directory for compression outputs
 puzzle_dir: /workspace/puzzle_dir
diff --git a/examples/puzzletron/configs/mistral-small-24b-instruct-2501_pruneffn_memory/mistral-small-24b-instruct-2501_pruneffn_memory.yaml b/examples/puzzletron/configs/mistral-small-24b-instruct-2501_pruneffn_memory/mistral-small-24b-instruct-2501_pruneffn_memory.yaml
index 68a0652d6f1..11f1856ec09 100644
--- a/examples/puzzletron/configs/mistral-small-24b-instruct-2501_pruneffn_memory/mistral-small-24b-instruct-2501_pruneffn_memory.yaml
+++ b/examples/puzzletron/configs/mistral-small-24b-instruct-2501_pruneffn_memory/mistral-small-24b-instruct-2501_pruneffn_memory.yaml
@@ -6,7 +6,7 @@ defaults:
 input_hf_model_path: /workspace/hf_models/mistralai/Mistral-Small-24B-Instruct-2501
 
 # Dataset path for pruning and NAS scoring
-dataset_path: /workspace/datasets/Nemotron-Post-Training-Dataset-v2
+dataset_path: /workspace/datasets/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
 
 # Working directory for compression outputs
 puzzle_dir: /workspace/puzzle_dir
diff --git a/examples/puzzletron/configs/nemotron-nano-12b-v2/nemotron_nano_12b_v2_pruneffn_memory.yaml b/examples/puzzletron/configs/nemotron-nano-12b-v2/nemotron_nano_12b_v2_pruneffn_memory.yaml
index 3b880b2c7d1..5bb3273433d 100644
--- a/examples/puzzletron/configs/nemotron-nano-12b-v2/nemotron_nano_12b_v2_pruneffn_memory.yaml
+++ b/examples/puzzletron/configs/nemotron-nano-12b-v2/nemotron_nano_12b_v2_pruneffn_memory.yaml
@@ -6,7 +6,7 @@ defaults:
 input_hf_model_path: /workspace/hf_models/nvidia/Nemotron-Nano-12B-v2
 
 # Dataset path for pruning and NAS scoring
-dataset_path: /workspace/datasets/Nemotron-Post-Training-Dataset-v2
+dataset_path: /workspace/datasets/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
 
 # Working directory for compression outputs
 puzzle_dir: /workspace/puzzle_dir
diff --git a/examples/puzzletron/configs/qwen2_5_7b_instruct_pruneffn_memory/qwen2_5_7b_instruct_pruneffn_memory.yaml b/examples/puzzletron/configs/qwen2_5_7b_instruct_pruneffn_memory/qwen2_5_7b_instruct_pruneffn_memory.yaml
index fb961033bc3..d0758ce6167 100644
--- a/examples/puzzletron/configs/qwen2_5_7b_instruct_pruneffn_memory/qwen2_5_7b_instruct_pruneffn_memory.yaml
+++ b/examples/puzzletron/configs/qwen2_5_7b_instruct_pruneffn_memory/qwen2_5_7b_instruct_pruneffn_memory.yaml
@@ -6,7 +6,7 @@ defaults:
 input_hf_model_path: /workspace/hf_models/Qwen/Qwen2.5-7B-Instruct
 
 # Dataset path for pruning and NAS scoring
-dataset_path: /workspace/datasets/Nemotron-Post-Training-Dataset-v2
+dataset_path: /workspace/datasets/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
 
 # Working directory for compression outputs
 puzzle_dir: /workspace/puzzle_dir
diff --git a/examples/puzzletron/configs/qwen3-8b_pruneffn_memory/qwen3_8b_pruneffn_memory.yaml b/examples/puzzletron/configs/qwen3-8b_pruneffn_memory/qwen3_8b_pruneffn_memory.yaml
index 4ee81286dd2..15d8f48afa5 100644
--- a/examples/puzzletron/configs/qwen3-8b_pruneffn_memory/qwen3_8b_pruneffn_memory.yaml
+++ b/examples/puzzletron/configs/qwen3-8b_pruneffn_memory/qwen3_8b_pruneffn_memory.yaml
@@ -6,7 +6,7 @@ defaults:
 input_hf_model_path: /workspace/hf_models/Qwen/Qwen3-8B
 
 # Dataset path for pruning and NAS scoring
-dataset_path: /workspace/datasets/Nemotron-Post-Training-Dataset-v2
+dataset_path: /workspace/datasets/Puzzle-KD-Nemotron-Post-Training-Dataset-v2
 
 # Working directory for compression outputs
 puzzle_dir: /workspace/puzzle_dir
diff --git a/modelopt/torch/puzzletron/dataset/prepare_dataset.py b/modelopt/torch/puzzletron/dataset/prepare_dataset.py
index 0928b111afc..3d80062ae0f 100644
--- a/modelopt/torch/puzzletron/dataset/prepare_dataset.py
+++ b/modelopt/torch/puzzletron/dataset/prepare_dataset.py
@@ -23,6 +23,8 @@
 
 __all__ = ["process_and_save_dataset"]
 
+PREBUILT_KD_DATASET = "nvidia/Puzzle-KD-Nemotron-Post-Training-Dataset-v2"
+
 
 def process_and_save_dataset(
     dataset_name: str,
@@ -40,6 +42,15 @@ def process_and_save_dataset(
             )
             return
 
+    # The prebuilt dataset is already filtered and split — skip the 136 GB download.
+    if dataset_name == PREBUILT_KD_DATASET:
+        ds_dict = datasets.load_dataset(dataset_name)
+        os.makedirs(output_dir, exist_ok=True)
+        ds_dict.save_to_disk(output_dir)
+        mprint(f"Dataset splits:\n{ds_dict}")
+        mprint(f"Saved processed datasets to {output_dir}")
+        return
+
     ds = datasets.load_dataset(dataset_name, split=split)
     ds = datasets.concatenate_datasets(ds)
     # Filter out samples with reasoning = on