diff --git a/DC-AI-deployment b/DC-AI-deployment index 378148f..e4c0174 100644 --- a/DC-AI-deployment +++ b/DC-AI-deployment @@ -7,7 +7,6 @@ ADOC_POST=yes ADOC_TYPE=book ADOC_ATTRIBUTES=" --attribute env-daps=1" ADOC_ATTRIBUTES+=" --attribute PROF_PRODUCT=suseai" -ADOC_ATTRIBUTES+=" --attribute PROF_PRODUCT=suseai" ADOC_ATTRIBUTES+=" --attribute PROF_DEPLOYMENT=standard" STYLEROOT="/usr/share/xml/docbook/stylesheet/suse2022-ns" @@ -17,4 +16,5 @@ FALLBACK_STYLEROOT="/usr/share/xml/docbook/stylesheet/suse-ns" DOCBOOK5_RNG_URI="http://docbook.org/xml/5.2/rng/docbookxi.rng" #XSLTPARAM+=' --param toc.section.depth=2' -#XSLTPARAM+=' --param bubbletoc.section.depth=3 --param bubbletoc.max.depth=3' \ No newline at end of file +#XSLTPARAM+=' --param bubbletoc.section.depth=3 --param bubbletoc.max.depth=3' +#XSLTPARAM+=' --stringparam generate.toc="book title" ' \ No newline at end of file diff --git a/references/ollama-helmchart.adoc b/references/ollama-helmchart.adoc index add47c5..e219e6f 100644 --- a/references/ollama-helmchart.adoc +++ b/references/ollama-helmchart.adoc @@ -36,7 +36,6 @@ global: ingress: enabled: false defaultModel: "gemma:2b" -runtimeClassName: nvidia ollama: models: pull: diff --git a/references/owui-helm-overrides.adoc b/references/owui-helm-overrides.adoc index 842c208..05096e9 100644 --- a/references/owui-helm-overrides.adoc +++ b/references/owui-helm-overrides.adoc @@ -450,7 +450,6 @@ Following is an example of the `open-webui-pipelines-values.yaml` override file. [source,yaml] ---- -runtimeClassName: nvidia global: imagePullSecrets: - application-collection diff --git a/references/pytorch-helm-overrides.adoc b/references/pytorch-helm-overrides.adoc index 74713ad..43bb2f6 100644 --- a/references/pytorch-helm-overrides.adoc +++ b/references/pytorch-helm-overrides.adoc @@ -11,7 +11,6 @@ include::../snippets/helm-chart-overrides-intro.adoc[] [source,yaml] ---- # pytorch_custom_overrides.yaml -runtimeClassName: nvidia global: imagePullSecrets: - application-collection <.> @@ -48,7 +47,6 @@ To create a ConfigMap, run the following command: [source,yaml] ---- # pytorch_custom_overrides.yaml -runtimeClassName: nvidia global: imagePullSecrets: - application-collection @@ -82,7 +80,6 @@ Move the `entrypoint.sh` file plus any helper files under the `scripts/` directo [source,yaml] ---- # pytorch_custom_overrides.yaml -runtimeClassName: nvidia global: imagePullSecrets: - application-collection <.> @@ -117,7 +114,6 @@ For production use, we recommend using a storage solution suitable for persisten [source,yaml] ---- # pytorch_custom_overrides.yaml -runtimeClassName: nvidia global: imagePullSecrets: - application-collection <.> @@ -153,7 +149,6 @@ For production use, we recommend using a storage solution suitable for persisten [source,yaml] ---- # pytorch_custom_overrides.yaml -runtimeClassName: nvidia global: imagePullSecrets: - application-collection <.> diff --git a/references/vllm-helm-overrides.adoc b/references/vllm-helm-overrides.adoc index f39f729..e7c8f2f 100644 --- a/references/vllm-helm-overrides.adoc +++ b/references/vllm-helm-overrides.adoc @@ -111,7 +111,6 @@ The following {vllm} override file includes basic configuration options. * Access to a {huggingface} token (`HF_TOKEN`). * The model `meta-llama/Llama-3.1-8B-Instruct` from this example is a gated model that requires you to accept the agreement to access it. For more information, see link:https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct[]. -* The `runtimeClassName` specified here is `nvidia`. * Update the `storageClass:` entry for each `modelSpec`. [source,yaml] @@ -121,7 +120,6 @@ global: imagePullSecrets: - application-collection servingEngineSpec: - runtimeClassName: "nvidia" modelSpec: - name: "llama3" <.> registry: "dp.apps.rancher.io" <.> @@ -263,7 +261,6 @@ global: imagePullSecrets: - application-collection servingEngineSpec: - runtimeClassName: "nvidia" modelSpec: - name: "llama3" registry: "dp.apps.rancher.io" @@ -383,7 +380,6 @@ global: imagePullSecrets: - application-collection servingEngineSpec: - runtimeClassName: "nvidia" modelSpec: - name: "mistral" registry: "dp.apps.rancher.io" @@ -432,7 +428,6 @@ global: imagePullSecrets: - application-collection servingEngineSpec: - runtimeClassName: "nvidia" modelSpec: - name: "mistral" registry: "dp.apps.rancher.io" diff --git a/tasks/NVIDIA-Operator-installation.adoc b/tasks/NVIDIA-Operator-installation.adoc index 9955f7f..0115107 100644 --- a/tasks/NVIDIA-Operator-installation.adoc +++ b/tasks/NVIDIA-Operator-installation.adoc @@ -76,7 +76,14 @@ The NVIDIA operator restarts containerd with a hangup call which restarts RKE2. [IMPORTANT] ==== -The envvars `ACCEPT_NVIDIA_VISIBLE_DEVICES_ENVVAR_WHEN_UNPRIVILEGED`, `ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS` and `DEVICE_LIST_STRATEGY` are required to properly isolate GPU resources as explained in https://docs.google.com/document/d/1zy0key-EL6JH50MZgwg96RPYxxXXnVUdxLZwGiyqLd8/edit?tab=t.0[Preventing unprivileged access to GPUs in Kubernetes]. +The envvars `ACCEPT_NVIDIA_VISIBLE_DEVICES_ENVVAR_WHEN_UNPRIVILEGED`, `ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS` and `DEVICE_LIST_STRATEGY` are required to properly isolate GPU resources as explained in link:https://docs.google.com/document/d/1zy0key-EL6JH50MZgwg96RPYxxXXnVUdxLZwGiyqLd8/edit?tab=t.0[Preventing unprivileged access to GPUs in Kubernetes]. +==== + +[IMPORTANT] +==== +NVIDIA GPU Operator v25.10.x uses link:https://github.com/cncf-tags/container-device-interface/blob/main/SPEC.md[Container Device Interface (CDI) specification] which simplifies operations. +It is recommended that you enable CDI (the default) and the NRI plug-in on RKE2. +With both features enabled, you no longer need to pass extra environment variables for security requirements or set `runtimeClassName: nvidia` in your pod specifications. ==== [,yaml] @@ -164,7 +171,6 @@ metadata: namespace: default spec: restartPolicy: OnFailure - runtimeClassName: nvidia containers: - name: cuda-container image: nvcr.io/nvidia/k8s/cuda-sample:nbody