From b2b087bf1d5a4185c8fe27e654e5253392f442e9 Mon Sep 17 00:00:00 2001
From: Ganeshkumar Ashokavardhanan <aganeshkumar@microsoft.com>
Date: Tue, 30 Jun 2026 16:11:22 -0700
Subject: [PATCH 1/2] feat(gpu): enable CUDA driver prebake on shared Ubuntu
 gen2 VHD builds

Turn on the NVIDIA_CUDA_PREBAKE feature flag (added dark in #8786) for the
two shared x86 gen2 Ubuntu images that GPU CUDA nodes boot --
2204gen2containerd and 2404gen2containerd -- in both the release
(.vsts-vhd-builder-release.yaml) and PR/test (.vsts-vhd-builder.yaml) VHD
pipelines.

With the flag set, install-dependencies.sh pre-builds the NVIDIA CUDA kernel
module into the VHD at build time, so GPU nodes skip the ~80-150s DKMS compile
at boot. Non-GPU and --gpu-driver None nodes tear the module down during
provisioning (cleanUpPrebakedGPUDriver, also from #8786), so the shared image
carries no extra attack surface on those nodes.

Scope rationale:
- Only 22.04/24.04 gen2 x86: these are the images GPU CUDA SKUs (A10/A100/H100)
  boot, confirmed via e2e GPU scenarios that pin VHDUbuntu2204Gen2Containerd /
  VHDUbuntu2404Gen2Containerd. AzureLinux GPU is out of scope (the bake is
  Ubuntu-only); gen1/FIPS/TL/arm64 are not used by supported CUDA GPU SKUs.
- The Copy CIS Reports step keys off an exact-match FEATURE_FLAGS allowlist, so
  NVIDIA_CUDA_PREBAKE is added to that list to preserve report publishing.

Signed-off-by: Ganeshkumar Ashokavardhanan <aganeshkumar@microsoft.com>
---
 .pipelines/.vsts-vhd-builder-release.yaml           | 8 ++++++--
 .pipelines/.vsts-vhd-builder.yaml                   | 8 ++++++--
 .pipelines/templates/.builder-release-template.yaml | 2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/.pipelines/.vsts-vhd-builder-release.yaml b/.pipelines/.vsts-vhd-builder-release.yaml
index 3ff9671a1fd..dd6a6ca201a 100644
--- a/.pipelines/.vsts-vhd-builder-release.yaml
+++ b/.pipelines/.vsts-vhd-builder-release.yaml
@@ -611,7 +611,9 @@ stages:
               echo '##vso[task.setvariable variable=IMG_VERSION]latest'
               echo '##vso[task.setvariable variable=HYPERV_GENERATION]V2'
               echo '##vso[task.setvariable variable=AZURE_VM_SIZE]Standard_D16ds_v5'
-              echo '##vso[task.setvariable variable=FEATURE_FLAGS]None'
+              # NVIDIA_CUDA_PREBAKE bakes the CUDA driver kernel module into this shared x86 gen2 image so GPU
+              # nodes skip the ~80-150s DKMS compile at boot; non-GPU nodes tear it down during provisioning.
+              echo '##vso[task.setvariable variable=FEATURE_FLAGS]NVIDIA_CUDA_PREBAKE'
               echo '##vso[task.setvariable variable=ARCHITECTURE]X86_64'
               echo '##vso[task.setvariable variable=ENABLE_FIPS]False'
               echo '##vso[task.setvariable variable=ENABLE_TRUSTED_LAUNCH]False'
@@ -683,7 +685,9 @@ stages:
               echo '##vso[task.setvariable variable=IMG_VERSION]latest'
               echo '##vso[task.setvariable variable=HYPERV_GENERATION]V2'
               echo '##vso[task.setvariable variable=AZURE_VM_SIZE]Standard_D16ds_v5'
-              echo '##vso[task.setvariable variable=FEATURE_FLAGS]None'
+              # NVIDIA_CUDA_PREBAKE bakes the CUDA driver kernel module into this shared x86 gen2 image so GPU
+              # nodes skip the ~80-150s DKMS compile at boot; non-GPU nodes tear it down during provisioning.
+              echo '##vso[task.setvariable variable=FEATURE_FLAGS]NVIDIA_CUDA_PREBAKE'
               echo '##vso[task.setvariable variable=ARCHITECTURE]X86_64'
               echo '##vso[task.setvariable variable=ENABLE_FIPS]False'
               echo '##vso[task.setvariable variable=ENABLE_TRUSTED_LAUNCH]False'
diff --git a/.pipelines/.vsts-vhd-builder.yaml b/.pipelines/.vsts-vhd-builder.yaml
index e556a651b49..0383d9adadd 100644
--- a/.pipelines/.vsts-vhd-builder.yaml
+++ b/.pipelines/.vsts-vhd-builder.yaml
@@ -83,7 +83,9 @@ stages:
             echo '##vso[task.setvariable variable=IMG_VERSION]latest'
             echo '##vso[task.setvariable variable=HYPERV_GENERATION]V2'
             echo '##vso[task.setvariable variable=AZURE_VM_SIZE]Standard_D16ds_v5'
-            echo '##vso[task.setvariable variable=FEATURE_FLAGS]None'
+            # NVIDIA_CUDA_PREBAKE bakes the CUDA driver kernel module into this shared x86 gen2 image so GPU
+            # nodes skip the ~80-150s DKMS compile at boot; non-GPU nodes tear it down during provisioning.
+            echo '##vso[task.setvariable variable=FEATURE_FLAGS]NVIDIA_CUDA_PREBAKE'
             echo '##vso[task.setvariable variable=ARCHITECTURE]X86_64'
             echo '##vso[task.setvariable variable=ENABLE_FIPS]False'
             echo '##vso[task.setvariable variable=ENABLE_TRUSTED_LAUNCH]False'
@@ -104,7 +106,9 @@ stages:
             echo '##vso[task.setvariable variable=IMG_VERSION]latest'
             echo '##vso[task.setvariable variable=HYPERV_GENERATION]V2'
             echo '##vso[task.setvariable variable=AZURE_VM_SIZE]Standard_D16ds_v5'
-            echo '##vso[task.setvariable variable=FEATURE_FLAGS]None'
+            # NVIDIA_CUDA_PREBAKE bakes the CUDA driver kernel module into this shared x86 gen2 image so GPU
+            # nodes skip the ~80-150s DKMS compile at boot; non-GPU nodes tear it down during provisioning.
+            echo '##vso[task.setvariable variable=FEATURE_FLAGS]NVIDIA_CUDA_PREBAKE'
             echo '##vso[task.setvariable variable=ARCHITECTURE]X86_64'
             echo '##vso[task.setvariable variable=ENABLE_FIPS]false'
             echo '##vso[task.setvariable variable=ENABLE_TRUSTED_LAUNCH]False'
diff --git a/.pipelines/templates/.builder-release-template.yaml b/.pipelines/templates/.builder-release-template.yaml
index 3f8e59bb583..6c5195359dc 100644
--- a/.pipelines/templates/.builder-release-template.yaml
+++ b/.pipelines/templates/.builder-release-template.yaml
@@ -353,7 +353,7 @@ steps:
       TargetFolder: '$(Build.ArtifactStagingDirectory)'
 
   - task: CopyFiles@2
-    condition: and(eq(variables.OS_SKU, 'Ubuntu'), in(variables.OS_VERSION, '22.04', '24.04'), in(variables.FEATURE_FLAGS, 'None', 'cvm', 'NVIDIA_GB'))
+    condition: and(eq(variables.OS_SKU, 'Ubuntu'), in(variables.OS_VERSION, '22.04', '24.04'), in(variables.FEATURE_FLAGS, 'None', 'cvm', 'NVIDIA_GB', 'NVIDIA_CUDA_PREBAKE'))
     displayName: Copy CIS Reports
     inputs:
       SourceFolder: '$(System.DefaultWorkingDirectory)'

From 9a42032f9e8ddd17abc64d94d50ede5527527161 Mon Sep 17 00:00:00 2001
From: Ganeshkumar Ashokavardhanan <aganeshkumar@microsoft.com>
Date: Tue, 30 Jun 2026 16:19:39 -0700
Subject: [PATCH 2/2] docs(gpu): clarify prebake comment is a capability gated
 on the consume path

Address review feedback: the inline rationale said GPU nodes "skip the DKMS
compile at boot", but this PR only enables the bake -- the boot-time skip
requires the configGPUDrivers skip-build path (PR #8787), which is not yet in
main. Reword to "can later skip ... via the configGPUDrivers skip-build path"
so pipeline maintainers aren't misled, and note the teardown covers non-GPU and
--gpu-driver None nodes.

Signed-off-by: Ganeshkumar Ashokavardhanan <aganeshkumar@microsoft.com>
---
 .pipelines/.vsts-vhd-builder-release.yaml | 6 ++++--
 .pipelines/.vsts-vhd-builder.yaml         | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/.pipelines/.vsts-vhd-builder-release.yaml b/.pipelines/.vsts-vhd-builder-release.yaml
index dd6a6ca201a..ae7819d6f19 100644
--- a/.pipelines/.vsts-vhd-builder-release.yaml
+++ b/.pipelines/.vsts-vhd-builder-release.yaml
@@ -612,7 +612,8 @@ stages:
               echo '##vso[task.setvariable variable=HYPERV_GENERATION]V2'
               echo '##vso[task.setvariable variable=AZURE_VM_SIZE]Standard_D16ds_v5'
               # NVIDIA_CUDA_PREBAKE bakes the CUDA driver kernel module into this shared x86 gen2 image so GPU
-              # nodes skip the ~80-150s DKMS compile at boot; non-GPU nodes tear it down during provisioning.
+              # nodes can later skip the ~80-150s in-CSE DKMS compile via the configGPUDrivers skip-build path
+              # (PR #8787); non-GPU and --gpu-driver None nodes tear the module down during provisioning.
               echo '##vso[task.setvariable variable=FEATURE_FLAGS]NVIDIA_CUDA_PREBAKE'
               echo '##vso[task.setvariable variable=ARCHITECTURE]X86_64'
               echo '##vso[task.setvariable variable=ENABLE_FIPS]False'
@@ -686,7 +687,8 @@ stages:
               echo '##vso[task.setvariable variable=HYPERV_GENERATION]V2'
               echo '##vso[task.setvariable variable=AZURE_VM_SIZE]Standard_D16ds_v5'
               # NVIDIA_CUDA_PREBAKE bakes the CUDA driver kernel module into this shared x86 gen2 image so GPU
-              # nodes skip the ~80-150s DKMS compile at boot; non-GPU nodes tear it down during provisioning.
+              # nodes can later skip the ~80-150s in-CSE DKMS compile via the configGPUDrivers skip-build path
+              # (PR #8787); non-GPU and --gpu-driver None nodes tear the module down during provisioning.
               echo '##vso[task.setvariable variable=FEATURE_FLAGS]NVIDIA_CUDA_PREBAKE'
               echo '##vso[task.setvariable variable=ARCHITECTURE]X86_64'
               echo '##vso[task.setvariable variable=ENABLE_FIPS]False'
diff --git a/.pipelines/.vsts-vhd-builder.yaml b/.pipelines/.vsts-vhd-builder.yaml
index 0383d9adadd..e4ecb102a1f 100644
--- a/.pipelines/.vsts-vhd-builder.yaml
+++ b/.pipelines/.vsts-vhd-builder.yaml
@@ -84,7 +84,8 @@ stages:
             echo '##vso[task.setvariable variable=HYPERV_GENERATION]V2'
             echo '##vso[task.setvariable variable=AZURE_VM_SIZE]Standard_D16ds_v5'
             # NVIDIA_CUDA_PREBAKE bakes the CUDA driver kernel module into this shared x86 gen2 image so GPU
-            # nodes skip the ~80-150s DKMS compile at boot; non-GPU nodes tear it down during provisioning.
+            # nodes can later skip the ~80-150s in-CSE DKMS compile via the configGPUDrivers skip-build path
+            # (PR #8787); non-GPU and --gpu-driver None nodes tear the module down during provisioning.
             echo '##vso[task.setvariable variable=FEATURE_FLAGS]NVIDIA_CUDA_PREBAKE'
             echo '##vso[task.setvariable variable=ARCHITECTURE]X86_64'
             echo '##vso[task.setvariable variable=ENABLE_FIPS]False'
@@ -107,7 +108,8 @@ stages:
             echo '##vso[task.setvariable variable=HYPERV_GENERATION]V2'
             echo '##vso[task.setvariable variable=AZURE_VM_SIZE]Standard_D16ds_v5'
             # NVIDIA_CUDA_PREBAKE bakes the CUDA driver kernel module into this shared x86 gen2 image so GPU
-            # nodes skip the ~80-150s DKMS compile at boot; non-GPU nodes tear it down during provisioning.
+            # nodes can later skip the ~80-150s in-CSE DKMS compile via the configGPUDrivers skip-build path
+            # (PR #8787); non-GPU and --gpu-driver None nodes tear the module down during provisioning.
             echo '##vso[task.setvariable variable=FEATURE_FLAGS]NVIDIA_CUDA_PREBAKE'
             echo '##vso[task.setvariable variable=ARCHITECTURE]X86_64'
             echo '##vso[task.setvariable variable=ENABLE_FIPS]false'