From a9788dba0ad738df2995a63deb7296f1cc439f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ba=C5=BEant?= Date: Wed, 25 Mar 2026 11:51:20 +0100 Subject: [PATCH 1/3] updated airgap installation for all AI applications --- articles/ai-deployment-airgapped.adoc | 26 ++- concepts/AI-air-gap-stack.adoc | 4 +- references/litellm-helm-overrides.adoc | 10 +- references/ollama-helmchart.adoc | 11 +- references/owui-helm-overrides.adoc | 221 ++---------------- references/pytorch-helm-overrides.adoc | 18 +- references/qdrant-helm-overrides.adoc | 11 +- references/vllm-helm-overrides.adoc | 218 ++++++++++++++++- tasks/AI-deployment-ailibrary-installing.adoc | 10 +- tasks/litellm-installing.adoc | 16 +- tasks/mcpo-installing.adoc | 24 +- tasks/milvus-installing.adoc | 67 +----- tasks/mlflow-installing.adoc | 37 +-- tasks/ollama-installing.adoc | 2 +- tasks/opensearch-installing.adoc | 35 ++- tasks/owui-installing.adoc | 14 +- tasks/pytorch-installing.adoc | 16 +- tasks/qdrant-installing.adoc | 26 +++ tasks/vllm-installing.adoc | 14 +- 19 files changed, 467 insertions(+), 313 deletions(-) diff --git a/articles/ai-deployment-airgapped.adoc b/articles/ai-deployment-airgapped.adoc index aa0e61e..b7a710f 100644 --- a/articles/ai-deployment-airgapped.adoc +++ b/articles/ai-deployment-airgapped.adoc @@ -87,24 +87,26 @@ include::../glues/ai-library-intro.adoc[leveloffset=+1] :override-title: Installation procedure include::../tasks/AI-deployment-ailibrary-installing.adoc[leveloffset=+2] include::../tasks/cert-manager-installing.adoc[leveloffset=+2] -// include::../tasks/opensearch-installing.adoc[leveloffset=+2] +include::../tasks/opensearch-installing.adoc[leveloffset=+2] include::../tasks/milvus-installing.adoc[leveloffset=+2] include::../tasks/ollama-installing.adoc[leveloffset=+2] include::../references/ollama-helmchart.adoc[leveloffset=+3] include::../tasks/owui-installing.adoc[leveloffset=+2] include::../references/owui-helm-overrides.adoc[leveloffset=+3] include::../references/owui-helmchart.adoc[leveloffset=+3] -// include::../tasks/vllm-installing.adoc[leveloffset=+2] -// include::../references/vllm-helm-overrides.adoc[leveloffset=+3] -// include::../tasks/mcpo-installing.adoc[leveloffset=+2] -// include::../tasks/pytorch-installing.adoc[leveloffset=+2] -// include::../references/pytorch-helm-overrides.adoc[leveloffset=+3] -// include::../references/pytorch-helmchart.adoc[leveloffset=+3] -// include::../tasks/mlflow-installing.adoc[leveloffset=+2] -// include::../tasks/ai-deployment-ailibrary-deployer.adoc[leveloffset=+2] -// steps after deployment -// :override-title: Steps after the installation is complete -// include::../tasks/AI-deployment-steps-after.adoc[leveloffset=+1] +include::../tasks/vllm-installing.adoc[leveloffset=+2] +include::../references/vllm-helm-overrides.adoc[leveloffset=+3] +include::../tasks/mcpo-installing.adoc[leveloffset=+2] +include::../tasks/pytorch-installing.adoc[leveloffset=+2] +include::../references/pytorch-helm-overrides.adoc[leveloffset=+3] +include::../references/pytorch-helmchart.adoc[leveloffset=+3] +include::../tasks/qdrant-installing.adoc[leveloffset=+2] +include::../references/qdrant-helm-overrides.adoc[leveloffset=+3] +include::../tasks/litellm-installing.adoc[leveloffset=+2] +include::../references/litellm-helm-overrides.adoc[leveloffset=+3] +include::../references/litellm-helmchart.adoc[leveloffset=+3] +include::../tasks/mlflow-installing.adoc[leveloffset=+2] +//include::../tasks/ai-deployment-ailibrary-deployer.adoc[leveloffset=+2] [appendix] include::../references/AI-glossary.adoc[leveloffset=+1] diff --git a/concepts/AI-air-gap-stack.adoc b/concepts/AI-air-gap-stack.adoc index f08d3c9..b0aeac9 100644 --- a/concepts/AI-air-gap-stack.adoc +++ b/concepts/AI-air-gap-stack.adoc @@ -28,5 +28,5 @@ The following simplified workflow outlines the intended usage: Transfer the downloaded content to an air-gapped _local_ host and add it as a {zypper} repository to install {nvidia} drivers on _local_ GPU nodes. . Use `SUSE-AI-get-images.sh` on a _remote_ host to download {docker} images of required {productname} components. Transfer them to an air-gapped _local_ host. -. Use `SUSE-AI-load-images.sh` to load the transferred {docker} images of {productname} components into a custom _local_ ${docker} image registry. -. Install {ailibrary} components on the _local_ {kube} cluster from the _local_ custom {docker} registry. +. Use `SUSE-AI-load-images.sh` to load the transferred {docker} images of {productname} components into a custom _local_ {docker} image registry. +. Install {ailibrary} applications on the _local_ {kube} cluster from the _local_ custom {docker} registry. diff --git a/references/litellm-helm-overrides.adoc b/references/litellm-helm-overrides.adoc index 6de2a44..6efbd2e 100644 --- a/references/litellm-helm-overrides.adoc +++ b/references/litellm-helm-overrides.adoc @@ -18,11 +18,18 @@ endif::[] :override-abstract!: :override-title!: -:revdate: 2026-02-10 +:revdate: 2026-03-25 :page-revdate: {revdate} include::../snippets/helm-chart-overrides-intro.adoc[] +ifdef::deployment_standard[] +:imgRegistry: {empty} +endif::[] +ifdef::deployment_airgap[] +:imgRegistry: imageRegistry: :5043 +endif::[] + [#litellm-chart-example-basic] .Basic override file with {postgresql} deployment and master key automatically generated. ==== @@ -33,6 +40,7 @@ global: imagePullSecrets: - application-collection <.> - suse-ai-registry <.> + {imgRegistry} postgresql: persistence: storageClassName: "local-path" <.> diff --git a/references/ollama-helmchart.adoc b/references/ollama-helmchart.adoc index add47c5..0b82e37 100644 --- a/references/ollama-helmchart.adoc +++ b/references/ollama-helmchart.adoc @@ -1,6 +1,6 @@ [#ollama-helmchart] = Values for the {ollama} {helm} chart -:revdate: 2025-12-22 +:revdate: 2026-03-24 :page-revdate: {revdate} include::../snippets/helm-chart-overrides-intro.adoc[] @@ -28,11 +28,18 @@ If you do not want to use the {nvidia} GPU, remove the `gpu` section from `ollam .Basic override file with GPU and two models pulled at startup ==== +ifdef::deployment_standard[] +:imgRegistry: {empty} +endif::[] +ifdef::deployment_airgap[] +:imgRegistry: imageRegistry: :5043 +endif::[] [source,yaml] ---- global: imagePullSecrets: - application-collection + {imgRegistry} ingress: enabled: false defaultModel: "gemma:2b" @@ -54,7 +61,7 @@ persistentVolume: <.> enabled: true storageClass: local-path <.> ---- -<.> Without the `persistentVolume` option enabled, changes made to {ollama}--such as downloading other LLM-- are lost when the container is restarted. +<.> Without the `persistentVolume` option enabled, changes made to {ollama}, such as downloading other LLM, are lost when the container is restarted. <.> Use `local-path` storage only for testing purposes. For production use, we recommend using a storage solution suitable for persistent storage, such as {sstorage}. ==== diff --git a/references/owui-helm-overrides.adoc b/references/owui-helm-overrides.adoc index 842c208..30d5d2b 100644 --- a/references/owui-helm-overrides.adoc +++ b/references/owui-helm-overrides.adoc @@ -1,6 +1,6 @@ [#owui-helm-overrides] = Examples of {owui} {helm} chart override files -:revdate: 2025-12-22 +:revdate: 2026-03-24 :page-revdate: {revdate} include::../snippets/helm-chart-overrides-intro.adoc[] @@ -10,11 +10,17 @@ include::../snippets/helm-chart-overrides-intro.adoc[] ==== The following override file installs {ollama} during the {owui} installation. ifeval::["{PROF_DEPLOYMENT}" == "standard"] +:imgRegistry: {empty} +endif::[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] +:imgRegistry: imageRegistry: :5043 +endif::[] [source,yaml] ---- global: imagePullSecrets: - application-collection + {imgRegistry} ollamaUrls: - http://open-webui-ollama..svc.cluster.local:11434 persistence: @@ -102,172 +108,19 @@ You can customize it to match your actual infrastructure experimentally. Refer to link:https://www.nltk.org/index.html[] for licensing information. <.> API key value for communication between {owui} and {owui} Pipelines. The default value is '0p3n-w3bu!'. -endif::[] -ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] -[source,yaml] ----- -global: - imagePullSecrets: - - application-collection - imageRegistry: :5043 -ollamaUrls: -- http://open-webui-ollama..svc.cluster.local:11434 -persistence: - enabled: true - storageClass: local-path <.> -ollama: - enabled: true - ingress: - enabled: false - defaultModel: "gemma:2b" - ollama: - models: <.> - pull: - - "gemma:2b" - - "llama3.1" - gpu: <.> - enabled: true - type: 'nvidia' - number: 1 - persistentVolume: <.> - enabled: true - storageClass: local-path -pipelines: - enabled: true - persistence: - storageClass: local-path - extraEnvVars: <.> - - name: PIPELINES_URLS <.> - value: "https://raw.githubusercontent.com/SUSE/suse-ai-observability-extension/refs/heads/main/integrations/oi-filter/suse_ai_filter.py" - - name: OTEL_SERVICE_NAME <.> - value: "Open WebUI" - - name: OTEL_EXPORTER_HTTP_OTLP_ENDPONT <.> - value: "http://opentelemetry-collector.suse-observability.svc.cluster.local:4318" - - name: PRICING_JSON <.> - value: "https://raw.githubusercontent.com/SUSE/suse-ai-observability-extension/refs/heads/main/integrations/oi-filter/pricing.json" -ingress: - enabled: true - class: "" - annotations: - nginx.ingress.kubernetes.io/ssl-redirect: "true" - nginx.ingress.kubernetes.io/proxy-body-size: "1024m" - host: suse-ollama-webui <.> - tls: true -extraEnvVars: -- name: DEFAULT_MODELS <.> - value: "gemma:2b" -- name: DEFAULT_USER_ROLE - value: "user" -- name: WEBUI_NAME - value: "SUSE AI" -- name: GLOBAL_LOG_LEVEL - value: INFO -- name: RAG_EMBEDDING_MODEL - value: "sentence-transformers/all-MiniLM-L6-v2" -- name: VECTOR_DB - value: "milvus" -- name: MILVUS_URI - value: http://milvus..svc.cluster.local:19530 -- name: INSTALL_NLTK_DATASETS <.> - value: "true" -- name: OMP_NUM_THREADS - value: "1" -- name: OPENAI_API_KEY <.> - value: "0p3n-w3bu!" ----- -<.> Use `local-path` storage only for testing purposes. -For production use, we recommend using a storage solution more suitable for persistent storage. -To use {sstorage}, specify `longhorn`. -<.> Specifies that two large language models (LLM) will be loaded in {ollama} when the container starts. -<.> Enables GPU support for {ollama}. -The `type` must be `nvidia` because {nvidia} GPUs are the only supported devices. -`number` must be between 1 and the number of {nvidia} GPUs present on the system. -<.> Without the `persistentVolume` option enabled, changes made to {ollama}--such as downloading other LLM-- are lost when the container is restarted. -<.> The environment variables that you are making available for the pipeline's runtime container. -<.> A list of pipeline URLs to be downloaded and installed by default. -Individual URLs are separated by a semicolon `;`. -For air-gapped deployments, you need to provide the pipelines at URLs that are accessible from the local host, such as an internal GitLab instance. -<.> The service name that appears in traces and topological representations in {sobservability}. -<.> The endpoint for the {otelemetry} collector. -Make sure to use the HTTP port of your collector. -<.> A file for the model multipliers in cost estimation. -You can customize it to match your actual infrastructure experimentally. -For air-gapped deployments, you need to provide the pipelines at URLs that are accessible from the local host, such as an internal GitLab instance. -<.> Specifies the default LLM for {ollama}. -<.> Specifies the host name for the {owui} Web UI. -<.> Installs the _natural language toolkit_ (NLTK) datasets for {ollama}. -Refer to link:https://www.nltk.org/index.html[] for licensing information. -<.> API key value for communication between {owui} and {owui} Pipelines. -The default value is '0p3n-w3bu!'. -endif::[] ==== [#owui-ollama-deploy-separate] .{owui} override file with {ollama} installed separately ==== The following override file installs {ollama} separately from the {owui} installation. -ifeval::["{PROF_DEPLOYMENT}" == "standard"] -[source,yaml] ----- -global: - imagePullSecrets: - - application-collection -ollamaUrls: -- http://ollama..svc.cluster.local:11434 -persistence: - enabled: true - storageClass: local-path <.> -ollama: - enabled: false -pipelines: - enabled: False - persistence: - storageClass: local-path <.> -ingress: - enabled: true - class: "" - annotations: - nginx.ingress.kubernetes.io/ssl-redirect: "true" - host: suse-ollama-webui - tls: true -extraEnvVars: -- name: DEFAULT_MODELS <.> - value: "gemma:2b" -- name: DEFAULT_USER_ROLE - value: "user" -- name: WEBUI_NAME - value: "SUSE AI" -- name: GLOBAL_LOG_LEVEL - value: INFO -- name: RAG_EMBEDDING_MODEL - value: "sentence-transformers/all-MiniLM-L6-v2" -- name: VECTOR_DB - value: "milvus" -- name: MILVUS_URI - value: http://milvus..svc.cluster.local:19530 -- name: ENABLE_OTEL <.> - value: "true" -- name: OTEL_EXPORTER_OTLP_ENDPOINT <.> - value: http://opentelemetry-collector.observability.svc.cluster.local:4317 <.> -- name: OMP_NUM_THREADS - value: "1" ----- -<.> Use `local-path` storage only for testing purposes. -For production use, we recommend using a storage solution suitable for persistent storage, such as {sstorage}. -<.> Use `local-path` storage only for testing purposes. -For production use, we recommend using a storage solution suitable for persistent storage, such as {sstorage}. -<.> Specifies the default LLM for {ollama}. -<.> These values are optional, required only to receive telemetry data from {owui}. -<.> These values are optional, required only to receive telemetry data from {owui}. -<.> The URL of the {otelemetry} Collector installed by the user. -endif::[] -ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] + [source,yaml] ---- global: imagePullSecrets: - application-collection - imageRegistry: :5043 + {imgRegistry} ollamaUrls: - http://ollama..svc.cluster.local:11434 persistence: @@ -316,7 +169,6 @@ For production use, we recommend using a storage solution suitable for persisten <.> These values are optional, required only to receive telemetry data from {owui}. <.> These values are optional, required only to receive telemetry data from {owui}. <.> The URL of the {otelemetry} Collector installed by the user. -endif::[] ==== [#owui-ollama-deploy-pipelines] @@ -329,44 +181,13 @@ This simple filter adds a limit to the number of question and answer turns durin ===== Pipelines normally require additional configuration provided either via environment variables or specified in the {owui} Web UI. ===== -ifeval::["{PROF_DEPLOYMENT}" == "standard"] -[source,yaml] ----- -global: - imagePullSecrets: - - application-collection -ollamaUrls: -- http://ollama..svc.cluster.local:11434 -persistence: - enabled: true - storageClass: local-path -ollama: - enabled: false -pipelines: - enabled: true - persistence: - storageClass: local-path - extraEnvVars: - - name: PIPELINES_URLS <.> - value: "https://raw.githubusercontent.com/SUSE/suse-ai-observability-extension/refs/heads/main/integrations/oi-filter/conversation_turn_limit_filter.py" -ingress: - enabled: true - class: "" - annotations: - nginx.ingress.kubernetes.io/ssl-redirect: "true" - host: suse-ollama-webui - tls: true -[...] ----- -<.> A list of pipeline URLs to be downloaded and installed by default. -Individual URLs are separated by a semicolon `;`. -endif::[] -ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] + [source,yaml] ---- global: imagePullSecrets: - application-collection + {imgRegistry} ollamaUrls: - http://ollama..svc.cluster.local:11434 persistence: @@ -392,11 +213,7 @@ ingress: ---- <.> A list of pipeline URLs to be downloaded and installed by default. Individual URLs are separated by a semicolon `;`. -For air-gapped deployments, you need to provide the pipelines at URLs that are accessible from the local host, such as an internal GitLab instance. -endif::[] -==== -ifeval::["{PROF_DEPLOYMENT}" == "standard"] [#owui-ollama-deploy-vllm] .{owui} override file with a connection to {vllm} ==== @@ -438,6 +255,7 @@ extraEnvVars: You can install the `open-webui-pipelines` service as a stand-alone deployment, independent of the {owui} chart. To install open-webui-pipelines as a stand-alone component, use the following command: +ifdef::deployment_standard[] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install open-webui-pipelines \ @@ -445,6 +263,18 @@ To install open-webui-pipelines as a stand-alone component, use the following co -n \ -f open-webui-pipelines-values.yaml ---- +endif::[] +ifdef::deployment_airgap[] +[source,bash,subs="+attributes"] +---- +{prompt_user}helm upgrade --install open-webui-pipelines \ + charts/open-webui-pipelines-.tgz \ +-n \ +--version \ +-f open-webui-pipelines-values.yaml +---- +endif::[] + Following is an example of the `open-webui-pipelines-values.yaml` override file. @@ -454,6 +284,7 @@ runtimeClassName: nvidia global: imagePullSecrets: - application-collection + {imgRegistry} image: registry: dp.apps.rancher.io repository: containers/open-webui-pipelines @@ -464,5 +295,3 @@ persistence: storageClass: local-path size: 10Gi ---- - -endif::[] diff --git a/references/pytorch-helm-overrides.adoc b/references/pytorch-helm-overrides.adoc index 74713ad..b4274b5 100644 --- a/references/pytorch-helm-overrides.adoc +++ b/references/pytorch-helm-overrides.adoc @@ -1,10 +1,17 @@ [#pytorch-helm-overrides] = Examples of {pytorch} {helm} chart override files -:revdate: 2025-12-22 +:revdate: 2026-03-25 :page-revdate: {revdate} include::../snippets/helm-chart-overrides-intro.adoc[] +ifdef::deployment_standard[] +:imgRegistry: {empty} +endif::[] +ifdef::deployment_airgap[] +:imgRegistry: imageRegistry: :5043 +endif::[] + [#pytorch-chart-example-basic] .Basic override file with GPU enabled ==== @@ -15,6 +22,7 @@ runtimeClassName: nvidia global: imagePullSecrets: - application-collection <.> + {imgRegistry} image: registry: dp.apps.rancher.io repository: containers/pytorch @@ -51,7 +59,8 @@ To create a ConfigMap, run the following command: runtimeClassName: nvidia global: imagePullSecrets: - - application-collection + - application-collection + {imgRegistry} image: registry: dp.apps.rancher.io <.> repository: containers/pytorch @@ -86,6 +95,7 @@ runtimeClassName: nvidia global: imagePullSecrets: - application-collection <.> + {imgRegistry} image: registry: dp.apps.rancher.io repository: containers/pytorch @@ -121,6 +131,7 @@ runtimeClassName: nvidia global: imagePullSecrets: - application-collection <.> + {imgRegistry} image: registry: dp.apps.rancher.io repository: containers/pytorch @@ -157,6 +168,7 @@ runtimeClassName: nvidia global: imagePullSecrets: - application-collection <.> + {imgRegistry} image: registry: dp.apps.rancher.io repository: containers/pytorch @@ -183,4 +195,4 @@ For production use, we recommend using a storage solution suitable for persisten <.> Do not specify the protocol, such as `https://`. <.> Specify a branch name, a tag name or a commit. <.> Specify a preconfigured secret with username and password (or token). -==== +==== \ No newline at end of file diff --git a/references/qdrant-helm-overrides.adoc b/references/qdrant-helm-overrides.adoc index 798415a..7089098 100644 --- a/references/qdrant-helm-overrides.adoc +++ b/references/qdrant-helm-overrides.adoc @@ -19,9 +19,16 @@ endif::[] :override-abstract!: :override-title!: -:revdate: 2026-03-09 +:revdate: 2026-03-25 :page-revdate: {revdate} +`ifdef::deployment_standard[] +:imgRegistry: {empty} +endif::[] +ifdef::deployment_airgap[] +:imgRegistry: imageRegistry: :5043 +endif::[]` + [#qdrant-chart-example-basic] .Basic override file when the cluster has no default storage class set. ==== @@ -31,6 +38,7 @@ endif::[] global: imagePullSecrets: - suse-ai-registry <.> + {imgRegistry} persistence: accessModes: ["ReadWriteOnce"] size: 10Gi @@ -56,6 +64,7 @@ For production use, we recommend using a storage solution suitable for persisten global: imagePullSecrets: - suse-ai-registry <.> + {imgRegistry} env: - name: QDRANT__GPU__INDEXING value: "1" diff --git a/references/vllm-helm-overrides.adoc b/references/vllm-helm-overrides.adoc index f39f729..c2b559c 100644 --- a/references/vllm-helm-overrides.adoc +++ b/references/vllm-helm-overrides.adoc @@ -1,6 +1,6 @@ [#vllm-helm-overrides] = Examples of {vllm} {helm} chart override files -:revdate: 2025-12-22 +:revdate: 2026-03-24 :page-revdate: {revdate} include::../snippets/helm-chart-overrides-intro.adoc[] @@ -10,6 +10,7 @@ include::../snippets/helm-chart-overrides-intro.adoc[] ==== The following override file installs {vllm} using a model that is publicly available. +ifdef::deployment_standard[] [source,yaml] ---- global: @@ -28,6 +29,28 @@ servingEngineSpec: requestMemory: "16Gi" requestGPU: 1 ---- +endif::[] +ifdef::deployment_airgap[] +[source,yaml] +---- +global: + imagePullSecrets: + - application-collection + imageRegistry: :5043 +servingEngineSpec: + modelSpec: + - name: "phi3-mini-4k" + registry: "dp.apps.rancher.io" + repository: "containers/vllm-openai" + tag: "0.9.1" + imagePullPolicy: "IfNotPresent" + modelURL: "microsoft/Phi-3-mini-4k-instruct" + replicaCount: 1 + requestCPU: 6 + requestMemory: "16Gi" + requestGPU: 1 +---- +endif::[] .Validating the installation . Pulling the images can take a long time. @@ -114,6 +137,7 @@ For more information, see link:https://huggingface.co/meta-llama/Llama-3.1-8B-In * The `runtimeClassName` specified here is `nvidia`. * Update the `storageClass:` entry for each `modelSpec`. +ifdef::deployment_standard[] [source,yaml] ---- # vllm_custom_overrides.yaml @@ -166,6 +190,62 @@ Sufficient memory is required to load the model. <.> Your {huggingface} token for accessing gated models. Replace `HF_TOKEN` with your actual token. ==== +endif::[] +ifdef::deployment_airgap[] +[source,yaml] +---- +# vllm_custom_overrides.yaml +global: + imagePullSecrets: + - application-collection + imageRegistry: :5043 +servingEngineSpec: + runtimeClassName: "nvidia" + modelSpec: + - name: "llama3" <.> + registry: "dp.apps.rancher.io" <.> + repository: "containers/vllm-openai" <.> + tag: "0.9.1" <.> + imagePullPolicy: "IfNotPresent" + modelURL: "meta-llama/Llama-3.1-8B-Instruct" <.> + replicaCount: 1 <.> + requestCPU: 10 <.> + requestMemory: "16Gi" <.> + requestGPU: 1 <.> + storageClass: + pvcStorage: "50Gi" <.> + pvcAccessMode: + - ReadWriteOnce + + vllmConfig: + enableChunkedPrefill: false <.> + enablePrefixCaching: false <.> + maxModelLen: 4096 <.> + dtype: "bfloat16" <.> + extraArgs: ["--disable-log-requests", "--gpu-memory-utilization", "0.8"] <.> + + hf_token: <.> +---- +<.> The unique identifier for your model deployment. +<.> The {docker} image registry containing the model's serving engine image. +<.> The {docker} image repository containing the model's serving engine image. +<.> The version of the model image to use. +<.> The URL pointing to the model on {huggingface} or another hosting service. +<.> The number of replicas for the deployment, which allows scaling for load. +<.> The amount of CPU resources requested per replica. +<.> Memory allocation for the deployment. +Sufficient memory is required to load the model. +<.> The number of GPUs to allocate for the deployment. +<.> The Persistent Volume Claim (PVC) size for model storage. +<.> Optimizes performance by prefetching model chunks. +<.> Enables caching of prompt prefixes to speed up inference for repeated prompts. +<.> The maximum sequence length the model can handle. +<.> The data type for model weights, such as `bfloat16` for mixed-precision inference and faster performance on modern GPUs. +<.> Additional command-line arguments for {vllm}, such as disabling request logging or setting GPU memory utilization. +<.> Your {huggingface} token for accessing gated models. +Replace `HF_TOKEN` with your actual token. +==== +endif::[] [#vllm-helm-overrides-prefetched] .Loading prefetched models from persistent storage @@ -256,6 +336,7 @@ Apply the specification with the following commands: ---- . Update the custom {vllm} override file with support for PVC. + +ifdef::deployment_standard[] [source,yaml] ---- # vllm_custom_overrides.yaml @@ -293,6 +374,47 @@ servingEngineSpec: ---- <.> Specify your PVC name. <.> The mount path must match the base directory of the `servingEngineSpec.modelSpec.modeURL` value specified above. +endif::[] +ifdef::deployment_airgap[] +[source,yaml] +---- +# vllm_custom_overrides.yaml +global: + imagePullSecrets: + - application-collection + imageRegistry: :5043 +servingEngineSpec: + runtimeClassName: "nvidia" + modelSpec: + - name: "llama3" + registry: "dp.apps.rancher.io" + repository: "containers/vllm-openai" + tag: "0.9.1" + imagePullPolicy: "IfNotPresent" + modelURL: "/models/llama-3.1-8b-it" + replicaCount: 1 + + requestCPU: 10 + requestMemory: "16Gi" + requestGPU: 1 + + extraVolumes: + - name: models-pvc + persistentVolumeClaim: + claimName: models-pvc <.> + + extraVolumeMounts: + - name: models-pvc + mountPath: /models <.> + + vllmConfig: + maxModelLen: 4096 + + hf_token: +---- +<.> Specify your PVC name. +<.> The mount path must match the base directory of the `servingEngineSpec.modelSpec.modeURL` value specified above. +endif::[] + Save it as `vllm_custom_overrides.yaml` and apply with `kubectl apply -f vllm_custom_overrides.yaml`. . The following example lists mounted PVCs for a pod. @@ -318,6 +440,7 @@ Ray is currently not supported. Therefore, sharding a single large model across multiple GPUs is not supported. ===== +ifdef::deployment_standard[] [source,yaml] ---- # vllm_custom_overrides.yaml @@ -358,6 +481,50 @@ servingEngineSpec: maxModelLen: 4096 hf_token: ---- +endif::[] +ifdef::deployment_standard[] +[source,yaml] +---- +# vllm_custom_overrides.yaml +global: + imagePullSecrets: + - application-collection + imageRegistry: :5043 +servingEngineSpec: + modelSpec: + - name: "llama3" + registry: "dp.apps.rancher.io" + repository: "containers/vllm-openai" + tag: "0.9.1" + imagePullPolicy: "IfNotPresent" + modelURL: "meta-llama/Llama-3.1-8B-Instruct" + replicaCount: 1 + requestCPU: 10 + requestMemory: "16Gi" + requestGPU: 1 + pvcStorage: "50Gi" + storageClass: + vllmConfig: + maxModelLen: 4096 + hf_token: + + - name: "mistral" + registry: "dp.apps.rancher.io" + repository: "containers/vllm-openai" + tag: "0.9.1" + imagePullPolicy: "IfNotPresent" + modelURL: "mistralai/Mistral-7B-Instruct-v0.2" + replicaCount: 1 + requestCPU: 10 + requestMemory: "16Gi" + requestGPU: 1 + pvcStorage: "50Gi" + storageClass: + vllmConfig: + maxModelLen: 4096 + hf_token: +---- +endif::[] ==== [#vllm-helm-overrides-offloading] @@ -376,6 +543,7 @@ These experimental features are only supported on newer GPU generations. It is not recommended to enable them without a compelling reason. ===== +ifdef::deployment_standard[] [source,yaml] ---- # vllm_custom_overrides.yaml @@ -408,6 +576,42 @@ servingEngineSpec: hf_token: ---- +endif::[] +ifdef::deployment_airgap[] +[source,yaml] +---- +# vllm_custom_overrides.yaml +global: + imagePullSecrets: + - application-collecti1on + imageRegistry: :5043 +servingEngineSpec: + runtimeClassName: "nvidia" + modelSpec: + - name: "mistral" + registry: "dp.apps.rancher.io" + repository: "containers/lmcache-vllm-openai" + tag: "0.3.2" + imagePullPolicy: "IfNotPresent" + modelURL: "mistralai/Mistral-7B-Instruct-v0.2" + replicaCount: 1 + requestCPU: 10 + requestMemory: "40Gi" + requestGPU: 1 + pvcStorage: "50Gi" + storageClass: + pvcAccessMode: + - ReadWriteOnce + vllmConfig: + maxModelLen: 32000 + + lmcacheConfig: + enabled: false + cpuOffloadingBufferSize: "20" + + hf_token: +---- +endif::[] ==== [#vllm-helm-overrides-lmcache] @@ -424,13 +628,19 @@ Setting `lmcacheConfig.enabled` to `true` implicitly enables the `LMCACHE_USE_EX These experimental features are only supported on newer GPU generations. It is not recommended to enable them without a compelling reason. ===== - -[source,yaml] +ifdef::deployment_standard[] +:imgRegistry: {empty} +endif::[] +ifdef::deployment_airgap[] +:imgRegistry: imageRegistry: :5043 +endif::[] +[source,yaml,subs="+attributes"] ---- # vllm_custom_overrides.yaml global: imagePullSecrets: - application-collection + {imgRegistry} servingEngineSpec: runtimeClassName: "nvidia" modelSpec: @@ -494,4 +704,4 @@ routerSpec: routingLogic: "session" sessionKey: "x-user-id" ---- -==== +==== \ No newline at end of file diff --git a/tasks/AI-deployment-ailibrary-installing.adoc b/tasks/AI-deployment-ailibrary-installing.adoc index 92e26bf..d740ee9 100644 --- a/tasks/AI-deployment-ailibrary-installing.adoc +++ b/tasks/AI-deployment-ailibrary-installing.adoc @@ -95,7 +95,8 @@ The username is `regcode` and the password is the {scca} registration code of yo -p ---- ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] -. On the _remote_ host, download the `SUSE-AI-get-images.sh` script from the xref:ai-air-gap-stack[air-gap stack] and run it. +. On the _remote_ host, download the `SUSE-AI-get-images.sh` script and the `charts.txt` file from the xref:ai-air-gap-stack[air-gap stack]. +Edit the list of workloads in the `charts.txt` file to match your preferences and run the script. + [source,bash,subs="+attributes"] ---- @@ -121,20 +122,13 @@ ifeval::["{PROF_DEPLOYMENT}" == "standard"] You can either install each component separately, or use the {saideployer} chart to install the components together as described in xref:ailibrary-installing-deployer[]. endif::[] -ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] -.. Install {milvus} as described in xref:milvus-installing[]. -endif::[] -ifeval::["{PROF_DEPLOYMENT}" == "standard"] .. Install an application with vector database capabilities. {owui} supports either xref:opensearch-installing[{opensearch}] or xref:milvus-installing[{milvus}]. -endif::[] .. _(Optional)_ Install {ollama} as described in xref:ollama-installing[]. .. Install {owui} as described in xref:owui-installing[]. -ifeval::["{PROF_DEPLOYMENT}" == "standard"] .. Install {vllm} as described in xref:vllm-installing[]. .. Install {mcpo} as described in xref:mcpo-installing[]. .. Install {pytorch} as described in xref:pytorch-installing[]. .. Install {mlflow} as described in xref:mlflow-installing[]. .. Install {qdrant} as described in xref:qdrant-installing[]. .. Install {litellm} as described in xref:litellm-installing[]. -endif::[] diff --git a/tasks/litellm-installing.adoc b/tasks/litellm-installing.adoc index f389e96..7863f38 100644 --- a/tasks/litellm-installing.adoc +++ b/tasks/litellm-installing.adoc @@ -19,7 +19,7 @@ endif::[] :override-abstract!: :override-title!: -:revdate: 2026-02-10 +:revdate: 2026-03-25 :page-revdate: {revdate} [NOTE] @@ -57,6 +57,7 @@ include::../snippets/ai-library-requirement.adoc[] Find examples of {litellm} override files in xref:litellm-helm-overrides[] together with a list of all valid options and their values as displayed by the `helm show` commands. . Install the {litellm} {helm} chart using the `litellm_custom_overrides.yaml` file by running the following command. + +ifdef::deployment_standard[] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -64,7 +65,19 @@ include::../snippets/ai-library-requirement.adoc[] -n SUSE_AI_NAMESPACE \ -f litellm_custom_overrides.yaml ---- +endif::[] +ifdef::deployment_airgap[] +[source,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + litellm litellm-.tgz \ + -n SUSE_AI_NAMESPACE \ + --version \ + -f litellm_custom_overrides.yaml +---- +endif::[] +ifdef::deployment_standard[] [#litellm-upgrading] == Upgrading {litellm} @@ -83,6 +96,7 @@ You can upgrade {litellm} to a specific version by running the following command ==== If you omit the `--version` option, {litellm} gets upgraded to the latest available version. ==== +endif::[] [#litellm-uninstalling] == Uninstalling {litellm} diff --git a/tasks/mcpo-installing.adoc b/tasks/mcpo-installing.adoc index 75fba71..79c8c43 100644 --- a/tasks/mcpo-installing.adoc +++ b/tasks/mcpo-installing.adoc @@ -1,6 +1,6 @@ [#mcpo-installing] = Installing {mcpo} -:revdate: 2025-12-22 +:revdate: 2026-03-25 :page-revdate: {revdate} {mcp} (Model Context Protocol) is an open source standard for connecting AI applications{mdash}such as {suseai}{mdash}to external systems. @@ -32,12 +32,19 @@ include::../snippets/ai-library-requirement.adoc[] The following file defines multiple {mcp} servers in the `config.mcpServers` section. These servers will be added to the {mcpo} configuration file `config.json`. + +ifdef::deployment_standard[] +:imgRegistry: {empty} +endif::[] +ifdef::deployment_airgap[] +:imgRegistry: imageRegistry: :5043 +endif::[] [source,yaml] ---- # mcpo_custom_overrides.yaml global: imagePullSecrets: - application-collection + {imgRegistry} config: mcpServers: memory: @@ -66,6 +73,7 @@ config: . After saving the override file as `mcpo_custom_overrides.yaml`, apply its configuration with the following command. + +ifdef::deployment_standard[] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -73,7 +81,17 @@ config: -n SUSE_AI_NAMESPACE \ -f mcpo_custom_overrides.yaml ---- - +endif::[] +ifdef::deployment_airgap[] +[source,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + mcpo charts/open-webui-mcpo-.tgz \ + -n SUSE_AI_NAMESPACE \ + --version \ + -f mcpo_custom_overrides.yaml +---- +endif::[] [TIP] ==== .Installing {mcp} servers @@ -118,6 +136,7 @@ For more information, see link:https://documentation.suse.com/suse-ai/1.0/html/o To enable selected {mcp} tools by default for a model, refer to link:https://documentation.suse.com/suse-ai/1.0/html/openwebui-configuring/index.html#owui-enabling-default-mcp-services[Enabling default {mcp} services]. ==== +ifdef::deployment_standard[] [#mcpo-upgrading] == Upgrading {mcpo} @@ -141,6 +160,7 @@ To upgrade {mcpo}, identify the new version number and run the following command ==== If you omit the `--version` option, {mcpo} gets upgraded to the latest available version. ==== +endif::[] [#mcpo-uninstalling] == Uninstalling {mcpo} diff --git a/tasks/milvus-installing.adoc b/tasks/milvus-installing.adoc index a6f6963..d224f5d 100644 --- a/tasks/milvus-installing.adoc +++ b/tasks/milvus-installing.adoc @@ -52,72 +52,17 @@ As a template, you can use the `values.yaml` file that is included in the `chart endif::[] + ifeval::["{PROF_DEPLOYMENT}" == "standard"] -[source,yaml] ----- -global: - imagePullSecrets: - - application-collection -cluster: - enabled: true -standalone: - persistence: - persistentVolumeClaim: - storageClassName: "local-path" -etcd: - replicaCount: 1 - persistence: - storageClassName: "local-path" -minio: - mode: distributed - replicas: 4 - rootUser: "admin" - rootPassword: "adminminio" - persistence: - storageClass: "local-path" - resources: - requests: - memory: 1024Mi -kafka: - enabled: true - name: kafka - replicaCount: 3 - broker: - enabled: true - cluster: - listeners: - client: - protocol: 'PLAINTEXT' - controller: - protocol: 'PLAINTEXT' - persistence: - enabled: true - annotations: {} - labels: {} - existingClaim: "" - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 8Gi - storageClassName: "local-path" -extraConfigFiles: <.> - user.yaml: |+ - trace: - exporter: jaeger - sampleFraction: 1 - jaeger: - url: "http://opentelemetry-collector.observability.svc.cluster.local:14268/api/traces" <.> ----- -<.> The `extraConfigFiles` section is optional, required only to receive telemetry data from {owui}. -<.> The URL of the {otelemetry} Collector installed by the user. +:imgRegistry: {empty} endif::[] ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] +:imgRegistry: imageRegistry: :5043 +endif::[] [source,yaml] ---- global: imagePullSecrets: - application-collection - imageRegistry: :5043 + {imgRegistry} cluster: enabled: true standalone: @@ -171,7 +116,6 @@ extraConfigFiles: <.> ---- <.> The `extraConfigFiles` section is optional, required only to receive telemetry data from {owui}. <.> The URL of the {otelemetry} Collector installed by the user. -endif::[] + [TIP] ==== @@ -195,7 +139,8 @@ ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] {prompt_user}helm upgrade --install \ milvus charts/milvus-.tgz \ -n \ - --version -f + --version \ + -f ---- endif::[] diff --git a/tasks/mlflow-installing.adoc b/tasks/mlflow-installing.adoc index 78d94e2..01076ce 100644 --- a/tasks/mlflow-installing.adoc +++ b/tasks/mlflow-installing.adoc @@ -1,8 +1,15 @@ [#mlflow-installing] = Installing {mlflow} -:revdate: 2025-12-22 +:revdate: 2026-03-25 :page-revdate: {revdate} +ifdef::deployment_standard[] +:imgRegistry: {empty} +endif::[] +ifdef::deployment_airgap[] +:imgRegistry: imageRegistry: :5043 +endif::[] + {mlflow} is an open-source platform for managing the end-to-end machine learning lifecycle. It provides a centralized model registry to track and manage the entire lifecycle of machine learning models. {mlflow} includes tools for experiment tracking, model packaging, versioning and deployment. @@ -10,20 +17,19 @@ This helps streamline the transition from development to production, ensuring re This section describes how to deploy {mlflow} using either {docker} or {helm} on a {kube} cluster. -// 2025-11-26 tbazant: commenting for now as we don't have an mlflow chart in appco -//[#mlflow-installing-app-details] -//== Details about the {mlflow} application -// -//Before deploying {mlflow}, it is important to know more about the supported configurations and documentation. -//The following command provides the corresponding details: -// -//[source] -//---- -//helm show values oci://dp.apps.rancher.io/charts/mlflow -//---- -// -//Alternatively, you can also refer to the {mlflow} {helm} chart page on the {sappco} site at link:https://apps.rancher.io/applications/mlflow[]. -//It contains {mlflow} dependencies, available versions and the link to pull the {mlflow} container image. +[#mlflow-installing-app-details] +== Details about the {mlflow} application + +Before deploying {mlflow}, it is important to know more about the supported configurations and documentation. +The following command provides the corresponding details: + +[source] +---- +helm show values oci://dp.apps.rancher.io/charts/mlflow +---- + +Alternatively, you can also refer to the {mlflow} {helm} chart page on the {sappco} site at link:https://apps.rancher.io/applications/mlflow[]. +It contains {mlflow} dependencies, available versions and the link to pull the {mlflow} container image. [#mlflow-installing-kubernetes] == Installing {mlflow} using {helm} on a {kube} cluster @@ -50,6 +56,7 @@ image: tag: "CONTAINER_VERSION" imagePullSecrets: - name: application-collection + {imgRegistry} nameOverride: "" fullnameOverride: "" serviceAccount: diff --git a/tasks/ollama-installing.adoc b/tasks/ollama-installing.adoc index 53ba53e..c635d83 100644 --- a/tasks/ollama-installing.adoc +++ b/tasks/ollama-installing.adoc @@ -6,7 +6,7 @@ {ollama} is a tool for running and managing language models locally on your computer. It offers a simple interface to download, run and interact with models without relying on cloud resources. -[TIP,os=suseai] +[TIP] ==== When installing {suseai}, {ollama} is installed by the {owui} installation by default. If you decide to install {ollama} separately, disable its installation during the installation of {owui} as outlined in xref:owui-ollama-deploy-separate[]. diff --git a/tasks/opensearch-installing.adoc b/tasks/opensearch-installing.adoc index 2e96772..2724c72 100644 --- a/tasks/opensearch-installing.adoc +++ b/tasks/opensearch-installing.adoc @@ -1,8 +1,15 @@ [#opensearch-installing] = Installing {opensearch} -:revdate: 2025-12-22 +:revdate: 2026-03-24 :page-revdate: {revdate} +ifdef::deployment_standard[] +:imgRegistry: {empty} +endif::[] +ifdef::deployment_airgap[] +:imgRegistry: imageRegistry: :5043 +endif::[] + {opensearch} is a community-driven, open source search and analytics suite. It is used to search, visualize and analyze data. {opensearch} consists of a data store and search engine ({opensearch}), a visualization and user interface ({opensearch} Dashboards), and a server-side data collector (Data Prepper). @@ -60,6 +67,7 @@ For a single-node cluster, use the following template file: global: imagePullSecrets: - application-collection + {imgRegistry} singleNode: true replicas: 1 persistence: @@ -94,6 +102,7 @@ For a multi-node cluster, use the following template file: global: imagePullSecrets: - application-collection + {imgRegistry} singleNode: false replicas: 3 persistence: @@ -150,6 +159,7 @@ config: ---- . After saving the override file as `opensearch_custom_overrides.yaml`, apply its configuration with the following command. + +ifdef::deployment_standard[] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -157,6 +167,17 @@ config: -n \ -f ---- +endif::[] +ifdef::deployment_airgap[] +[source,bash,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + opensearch charts/opensearch-.tgz \ + -n \ + --version \ + -f +---- +endif::[] . Check that the pods and services are running. + [source,bash,subs="+attributes"] @@ -220,6 +241,7 @@ extraEnvVars: ---- . Redeploy {owui}. + +ifdef::deployment_standard[] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -227,6 +249,17 @@ extraEnvVars: -n \ -f ---- +endif::[] +ifdef::deployment_airgap[] +[source,bash,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + open-webui charts/open-webui-.tgz \ + -n \ + --version \ + -f +---- +endif::[] . Verify that `VECTOR_DB` is set to `opensearch`. + [source,bash,subs="+attributes"] diff --git a/tasks/owui-installing.adoc b/tasks/owui-installing.adoc index 2f85864..676c6e8 100644 --- a/tasks/owui-installing.adoc +++ b/tasks/owui-installing.adoc @@ -35,13 +35,25 @@ Find more details in xref:owui-helm-overrides[]. For a list of all installation options with examples, refer to xref:owui-helmchart-values[]. . Install the {owui} {helm} chart using the `owui_custom_overrides.yaml` override file. + +ifdef::deployment_standard[] +[source,bash,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + open-webui oci://dp.apps.rancher.io/charts/open-webui \ + -n \ + -f +---- +endif::[] +ifdef::deployment_airgap[] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ open-webui charts/open-webui-.tgz \ -n \ - --version -f + --version \ + -f ---- +endif::[] -- ifeval::["{PROF_DEPLOYMENT}" == "standard"] diff --git a/tasks/pytorch-installing.adoc b/tasks/pytorch-installing.adoc index 2f6aa0b..b29791a 100644 --- a/tasks/pytorch-installing.adoc +++ b/tasks/pytorch-installing.adoc @@ -29,14 +29,27 @@ include::../snippets/ai-library-requirement.adoc[] Find examples of {pytorch} override files in xref:pytorch-helm-overrides[] and a list of all valid options and their values in xref:pytorch-helm-values[]. . Install the {pytorch} {helm} chart using the `pytorch_custom_overrides.yaml` file using the following command. + -[source,subs="+attributes"] +ifdef::deployment_standard[] +[source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ pytorch oci://dp.apps.rancher.io/charts/pytorch \ -n SUSE_AI_NAMESPACE \ -f pytorch_custom_overrides.yaml ---- +endif::[] +ifdef::deployment_airgap[] +[source,bash,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + pytorch charts/pytorch-.tgz \ + -n SUSE_AI_NAMESPACE \ + --version \ + -f pytorch_custom_overrides.yaml +---- +endif::[] +ifdef::deployment_standard[] [#pytorch-upgrading] == Upgrading {pytorch} @@ -55,6 +68,7 @@ You can upgrade {pytorch} to a specific version by running the following command ==== If you omit the `--version` option, {pytorch} gets upgraded to the latest available version. ==== +endif::[] [#pytorch-uninstalling] == Uninstalling {pytorch} diff --git a/tasks/qdrant-installing.adoc b/tasks/qdrant-installing.adoc index b0b208b..02d8519 100644 --- a/tasks/qdrant-installing.adoc +++ b/tasks/qdrant-installing.adoc @@ -36,6 +36,7 @@ include::../snippets/ai-library-requirement.adoc[] Find examples of {qdrant} override files in xref:qdrant-helm-overrides[] and a list of all valid options and their values from running helm show values. . Install the {qdrant} {helm} chart using the `qdrant_custom_overrides.yaml` file using the following command. + +ifdef::deployment_standard[] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -43,6 +44,17 @@ Find examples of {qdrant} override files in xref:qdrant-helm-overrides[] and a l -n SUSE_AI_NAMESPACE \ -f qdrant_custom_overrides.yaml ---- +endif::[] +ifdef::deployment_airgap[] +[source,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + qdrant charts/qdrant-.tgz \ + -n SUSE_AI_NAMESPACE \ + --version \ + -f qdrant_custom_overrides.yaml +---- +endif::[] [#qdrant-owui-integration] == Integrating {qdrant} with {owui} @@ -81,6 +93,7 @@ extraEnvVars: ---- . Redeploy {owui}. + +ifdef::deployment_standard[] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -88,6 +101,17 @@ extraEnvVars: -n \ -f ---- +endif::[] +ifdef::deployment_airgap[] +[source,bash,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + open-webui charts/open-webui-.tgz \ + -n \ + --version \ + -f +---- +endif::[] . Verify that `VECTOR_DB` is set to `qdrant`. + [source,bash,subs="+attributes"] @@ -99,6 +123,7 @@ Defaulted container "open-webui" out of: open-webui, copy-app-data (init) VECTOR_DB=qdrant ---- +ifdef::deployment_standard[] [#qdrant-upgrading] == Upgrading {qdrant} @@ -117,6 +142,7 @@ You can upgrade {qdrant} to a specific version by running the following command: ==== If you omit the `--version` option, {qdrant} gets upgraded to the latest available version. ==== +endif::[] [#qdrant-uninstalling] == Uninstalling {qdrant} diff --git a/tasks/vllm-installing.adoc b/tasks/vllm-installing.adoc index c9440a7..ede62a8 100644 --- a/tasks/vllm-installing.adoc +++ b/tasks/vllm-installing.adoc @@ -1,6 +1,6 @@ [#vllm-installing] = Installing {vllm} -:revdate: 2025-12-22 +:revdate: 2026-03-24 :page-revdate: {revdate} {vllm} is an open-source high-performance inference and serving engine for large language models (LLMs). @@ -52,6 +52,7 @@ The current release of {productname} {vllm} does not support Ray and LoraControl Find examples of override files in xref:vllm-helm-overrides[]. . After saving the override file as `vllm_custom_overrides.yaml`, apply its configuration with the following command. + +ifdef::deployment_standard[] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -59,6 +60,17 @@ Find examples of override files in xref:vllm-helm-overrides[]. -n \ -f ---- +endif::[] +ifdef::deployment_airgap[] +[source,bash,subs="+attributes"] +---- +{prompt_user}helm upgrade --install \ + vllm charts/vllm-.tgz \ + -n \ + --version \ + -f +---- +endif::[] [#vllm-owui-integration] == Integrating {vllm} with {owui} From a2c9e41d303c3871c0863451f2a6a9a4b7142fde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ba=C5=BEant?= Date: Thu, 9 Apr 2026 00:19:49 +0200 Subject: [PATCH 2/3] fixed validation error + updated code block headers --- articles/ai-deployment-airgapped.adoc | 22 ++ references/litellm-helm-overrides.adoc | 4 +- references/litellm-helmchart.adoc | 2 +- references/ollama-helmchart.adoc | 4 +- references/owui-helm-overrides.adoc | 18 +- references/pytorch-helm-overrides.adoc | 6 +- references/qdrant-helm-overrides.adoc | 4 +- references/vllm-helm-overrides.adoc | 234 ++---------------- tasks/AI-deployment-ailibrary-installing.adoc | 1 + tasks/litellm-installing.adoc | 6 +- tasks/mcpo-installing.adoc | 10 +- tasks/milvus-installing.adoc | 1 + tasks/mlflow-installing.adoc | 10 +- tasks/opensearch-installing.adoc | 16 +- tasks/owui-installing.adoc | 4 +- tasks/pytorch-installing.adoc | 6 +- tasks/qdrant-installing.adoc | 10 +- tasks/vllm-installing.adoc | 4 +- 18 files changed, 92 insertions(+), 270 deletions(-) diff --git a/articles/ai-deployment-airgapped.adoc b/articles/ai-deployment-airgapped.adoc index b7a710f..37fe245 100644 --- a/articles/ai-deployment-airgapped.adoc +++ b/articles/ai-deployment-airgapped.adoc @@ -83,28 +83,50 @@ include::../glues/ai-ssecurity-intro.adoc[leveloffset=+2] include::../tasks/Security-installation-airgapped.adoc[leveloffset=+3] include::../tasks/observability-settingup-ai.adoc[leveloffset=+2] // AI library + include::../glues/ai-library-intro.adoc[leveloffset=+1] :override-title: Installation procedure + include::../tasks/AI-deployment-ailibrary-installing.adoc[leveloffset=+2] + include::../tasks/cert-manager-installing.adoc[leveloffset=+2] + include::../tasks/opensearch-installing.adoc[leveloffset=+2] + include::../tasks/milvus-installing.adoc[leveloffset=+2] + include::../tasks/ollama-installing.adoc[leveloffset=+2] + include::../references/ollama-helmchart.adoc[leveloffset=+3] + include::../tasks/owui-installing.adoc[leveloffset=+2] + include::../references/owui-helm-overrides.adoc[leveloffset=+3] + include::../references/owui-helmchart.adoc[leveloffset=+3] + include::../tasks/vllm-installing.adoc[leveloffset=+2] + include::../references/vllm-helm-overrides.adoc[leveloffset=+3] + include::../tasks/mcpo-installing.adoc[leveloffset=+2] + include::../tasks/pytorch-installing.adoc[leveloffset=+2] + include::../references/pytorch-helm-overrides.adoc[leveloffset=+3] + include::../references/pytorch-helmchart.adoc[leveloffset=+3] + include::../tasks/qdrant-installing.adoc[leveloffset=+2] + include::../references/qdrant-helm-overrides.adoc[leveloffset=+3] + include::../tasks/litellm-installing.adoc[leveloffset=+2] + include::../references/litellm-helm-overrides.adoc[leveloffset=+3] + include::../references/litellm-helmchart.adoc[leveloffset=+3] + include::../tasks/mlflow-installing.adoc[leveloffset=+2] //include::../tasks/ai-deployment-ailibrary-deployer.adoc[leveloffset=+2] diff --git a/references/litellm-helm-overrides.adoc b/references/litellm-helm-overrides.adoc index 6efbd2e..d13062f 100644 --- a/references/litellm-helm-overrides.adoc +++ b/references/litellm-helm-overrides.adoc @@ -23,10 +23,10 @@ endif::[] include::../snippets/helm-chart-overrides-intro.adoc[] -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] :imgRegistry: {empty} endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] :imgRegistry: imageRegistry: :5043 endif::[] diff --git a/references/litellm-helmchart.adoc b/references/litellm-helmchart.adoc index f09666c..bf7e878 100644 --- a/references/litellm-helmchart.adoc +++ b/references/litellm-helmchart.adoc @@ -57,4 +57,4 @@ include::../snippets/helm-chart-overrides-intro.adoc[] | `pdb.annotations` | Extra metadata annotations to add to the PDB | `{}` | `pdb.labels` | Extra metadata labels to add to the PDB | `{}` -|=== \ No newline at end of file +|=== diff --git a/references/ollama-helmchart.adoc b/references/ollama-helmchart.adoc index 0b82e37..120965d 100644 --- a/references/ollama-helmchart.adoc +++ b/references/ollama-helmchart.adoc @@ -28,10 +28,10 @@ If you do not want to use the {nvidia} GPU, remove the `gpu` section from `ollam .Basic override file with GPU and two models pulled at startup ==== -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] :imgRegistry: {empty} endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] :imgRegistry: imageRegistry: :5043 endif::[] [source,yaml] diff --git a/references/owui-helm-overrides.adoc b/references/owui-helm-overrides.adoc index 30d5d2b..dfaefae 100644 --- a/references/owui-helm-overrides.adoc +++ b/references/owui-helm-overrides.adoc @@ -213,7 +213,7 @@ ingress: ---- <.> A list of pipeline URLs to be downloaded and installed by default. Individual URLs are separated by a semicolon `;`. - +==== [#owui-ollama-deploy-vllm] .{owui} override file with a connection to {vllm} ==== @@ -251,11 +251,10 @@ extraEnvVars: [#owui-pipelines-standalone] .Stand-alone deployment of open-webui-pipelines - +==== You can install the `open-webui-pipelines` service as a stand-alone deployment, independent of the {owui} chart. To install open-webui-pipelines as a stand-alone component, use the following command: -ifdef::deployment_standard[] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install open-webui-pipelines \ @@ -263,18 +262,6 @@ ifdef::deployment_standard[] -n \ -f open-webui-pipelines-values.yaml ---- -endif::[] -ifdef::deployment_airgap[] -[source,bash,subs="+attributes"] ----- -{prompt_user}helm upgrade --install open-webui-pipelines \ - charts/open-webui-pipelines-.tgz \ --n \ ---version \ --f open-webui-pipelines-values.yaml ----- -endif::[] - Following is an example of the `open-webui-pipelines-values.yaml` override file. @@ -295,3 +282,4 @@ persistence: storageClass: local-path size: 10Gi ---- +==== diff --git a/references/pytorch-helm-overrides.adoc b/references/pytorch-helm-overrides.adoc index b4274b5..1cadb8e 100644 --- a/references/pytorch-helm-overrides.adoc +++ b/references/pytorch-helm-overrides.adoc @@ -5,10 +5,10 @@ include::../snippets/helm-chart-overrides-intro.adoc[] -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] :imgRegistry: {empty} endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] :imgRegistry: imageRegistry: :5043 endif::[] @@ -195,4 +195,4 @@ For production use, we recommend using a storage solution suitable for persisten <.> Do not specify the protocol, such as `https://`. <.> Specify a branch name, a tag name or a commit. <.> Specify a preconfigured secret with username and password (or token). -==== \ No newline at end of file +==== diff --git a/references/qdrant-helm-overrides.adoc b/references/qdrant-helm-overrides.adoc index 7089098..deb3f70 100644 --- a/references/qdrant-helm-overrides.adoc +++ b/references/qdrant-helm-overrides.adoc @@ -22,10 +22,10 @@ endif::[] :revdate: 2026-03-25 :page-revdate: {revdate} -`ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] :imgRegistry: {empty} endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] :imgRegistry: imageRegistry: :5043 endif::[]` diff --git a/references/vllm-helm-overrides.adoc b/references/vllm-helm-overrides.adoc index c2b559c..473e94d 100644 --- a/references/vllm-helm-overrides.adoc +++ b/references/vllm-helm-overrides.adoc @@ -3,6 +3,13 @@ :revdate: 2026-03-24 :page-revdate: {revdate} +ifeval::["{PROF_DEPLOYMENT}" == "standard"] +:imgRegistry: {empty} +endif::[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] +:imgRegistry: imageRegistry: :5043 +endif::[] + include::../snippets/helm-chart-overrides-intro.adoc[] [#vllm-helm-overrides-minimal] @@ -10,12 +17,12 @@ include::../snippets/helm-chart-overrides-intro.adoc[] ==== The following override file installs {vllm} using a model that is publicly available. -ifdef::deployment_standard[] [source,yaml] ---- global: imagePullSecrets: - application-collection + {imgRegistry} servingEngineSpec: modelSpec: - name: "phi3-mini-4k" @@ -29,37 +36,17 @@ servingEngineSpec: requestMemory: "16Gi" requestGPU: 1 ---- -endif::[] -ifdef::deployment_airgap[] -[source,yaml] ----- -global: - imagePullSecrets: - - application-collection - imageRegistry: :5043 -servingEngineSpec: - modelSpec: - - name: "phi3-mini-4k" - registry: "dp.apps.rancher.io" - repository: "containers/vllm-openai" - tag: "0.9.1" - imagePullPolicy: "IfNotPresent" - modelURL: "microsoft/Phi-3-mini-4k-instruct" - replicaCount: 1 - requestCPU: 6 - requestMemory: "16Gi" - requestGPU: 1 ----- -endif::[] +==== .Validating the installation +==== . Pulling the images can take a long time. You can monitor the status of the {vllm} installation by running the following command: + [source,bash,subs="+attributes"] ---- {prompt_user}kubectl get pods -n - + NAME READY STATUS RESTARTS AGE [...] vllm-deployment-router-7588bf995c-5jbkf 1/1 Running 0 8m9s @@ -68,7 +55,6 @@ vllm-phi3-mini-4k-deployment-vllm-79d6fdc-tx7 1/1 Running 0 8m9s + Pods for the {vllm} deployment should transition to the states `Ready` and `Running`. -[.procedure] .Validating the stack . Expose the `vllm-router-service` port to the host machine: + @@ -137,13 +123,13 @@ For more information, see link:https://huggingface.co/meta-llama/Llama-3.1-8B-In * The `runtimeClassName` specified here is `nvidia`. * Update the `storageClass:` entry for each `modelSpec`. -ifdef::deployment_standard[] [source,yaml] ---- # vllm_custom_overrides.yaml global: imagePullSecrets: - application-collection + {imgRegistry} servingEngineSpec: runtimeClassName: "nvidia" modelSpec: @@ -190,62 +176,6 @@ Sufficient memory is required to load the model. <.> Your {huggingface} token for accessing gated models. Replace `HF_TOKEN` with your actual token. ==== -endif::[] -ifdef::deployment_airgap[] -[source,yaml] ----- -# vllm_custom_overrides.yaml -global: - imagePullSecrets: - - application-collection - imageRegistry: :5043 -servingEngineSpec: - runtimeClassName: "nvidia" - modelSpec: - - name: "llama3" <.> - registry: "dp.apps.rancher.io" <.> - repository: "containers/vllm-openai" <.> - tag: "0.9.1" <.> - imagePullPolicy: "IfNotPresent" - modelURL: "meta-llama/Llama-3.1-8B-Instruct" <.> - replicaCount: 1 <.> - requestCPU: 10 <.> - requestMemory: "16Gi" <.> - requestGPU: 1 <.> - storageClass: - pvcStorage: "50Gi" <.> - pvcAccessMode: - - ReadWriteOnce - - vllmConfig: - enableChunkedPrefill: false <.> - enablePrefixCaching: false <.> - maxModelLen: 4096 <.> - dtype: "bfloat16" <.> - extraArgs: ["--disable-log-requests", "--gpu-memory-utilization", "0.8"] <.> - - hf_token: <.> ----- -<.> The unique identifier for your model deployment. -<.> The {docker} image registry containing the model's serving engine image. -<.> The {docker} image repository containing the model's serving engine image. -<.> The version of the model image to use. -<.> The URL pointing to the model on {huggingface} or another hosting service. -<.> The number of replicas for the deployment, which allows scaling for load. -<.> The amount of CPU resources requested per replica. -<.> Memory allocation for the deployment. -Sufficient memory is required to load the model. -<.> The number of GPUs to allocate for the deployment. -<.> The Persistent Volume Claim (PVC) size for model storage. -<.> Optimizes performance by prefetching model chunks. -<.> Enables caching of prompt prefixes to speed up inference for repeated prompts. -<.> The maximum sequence length the model can handle. -<.> The data type for model weights, such as `bfloat16` for mixed-precision inference and faster performance on modern GPUs. -<.> Additional command-line arguments for {vllm}, such as disabling request logging or setting GPU memory utilization. -<.> Your {huggingface} token for accessing gated models. -Replace `HF_TOKEN` with your actual token. -==== -endif::[] [#vllm-helm-overrides-prefetched] .Loading prefetched models from persistent storage @@ -274,6 +204,7 @@ spec: ---- + Save it as `pvc-models.yaml` and apply with `kubectl apply -f pvc-models.yaml`. + . Create a secret resource for the {huggingface} token. + [source,bash,subs="+attributes"] @@ -282,6 +213,7 @@ Save it as `pvc-models.yaml` and apply with `kubectl apply -f pvc-models.yaml`. generic huggingface-credentials \ --from-literal=HUGGING_FACE_HUB_TOKEN= ---- + . Create a YAML specification for prefetching the model and save it as `job-prefetch-llama3.1-8b.yaml`. + [source,yaml] @@ -334,15 +266,16 @@ Apply the specification with the following commands: {prompt_user}kubectl -n \ wait --for=condition=complete job/prefetch-llama3.1-8b ---- + . Update the custom {vllm} override file with support for PVC. + -ifdef::deployment_standard[] [source,yaml] ---- # vllm_custom_overrides.yaml global: imagePullSecrets: - application-collection + {imgRegistry} servingEngineSpec: runtimeClassName: "nvidia" modelSpec: @@ -374,49 +307,9 @@ servingEngineSpec: ---- <.> Specify your PVC name. <.> The mount path must match the base directory of the `servingEngineSpec.modelSpec.modeURL` value specified above. -endif::[] -ifdef::deployment_airgap[] -[source,yaml] ----- -# vllm_custom_overrides.yaml -global: - imagePullSecrets: - - application-collection - imageRegistry: :5043 -servingEngineSpec: - runtimeClassName: "nvidia" - modelSpec: - - name: "llama3" - registry: "dp.apps.rancher.io" - repository: "containers/vllm-openai" - tag: "0.9.1" - imagePullPolicy: "IfNotPresent" - modelURL: "/models/llama-3.1-8b-it" - replicaCount: 1 - - requestCPU: 10 - requestMemory: "16Gi" - requestGPU: 1 - - extraVolumes: - - name: models-pvc - persistentVolumeClaim: - claimName: models-pvc <.> - - extraVolumeMounts: - - name: models-pvc - mountPath: /models <.> - - vllmConfig: - maxModelLen: 4096 - - hf_token: ----- -<.> Specify your PVC name. -<.> The mount path must match the base directory of the `servingEngineSpec.modelSpec.modeURL` value specified above. -endif::[] + Save it as `vllm_custom_overrides.yaml` and apply with `kubectl apply -f vllm_custom_overrides.yaml`. + . The following example lists mounted PVCs for a pod. + [source,bash,subs="+attributes"] @@ -440,13 +333,13 @@ Ray is currently not supported. Therefore, sharding a single large model across multiple GPUs is not supported. ===== -ifdef::deployment_standard[] [source,yaml] ---- # vllm_custom_overrides.yaml global: imagePullSecrets: - application-collection + {imgRegistry} servingEngineSpec: modelSpec: - name: "llama3" @@ -481,50 +374,6 @@ servingEngineSpec: maxModelLen: 4096 hf_token: ---- -endif::[] -ifdef::deployment_standard[] -[source,yaml] ----- -# vllm_custom_overrides.yaml -global: - imagePullSecrets: - - application-collection - imageRegistry: :5043 -servingEngineSpec: - modelSpec: - - name: "llama3" - registry: "dp.apps.rancher.io" - repository: "containers/vllm-openai" - tag: "0.9.1" - imagePullPolicy: "IfNotPresent" - modelURL: "meta-llama/Llama-3.1-8B-Instruct" - replicaCount: 1 - requestCPU: 10 - requestMemory: "16Gi" - requestGPU: 1 - pvcStorage: "50Gi" - storageClass: - vllmConfig: - maxModelLen: 4096 - hf_token: - - - name: "mistral" - registry: "dp.apps.rancher.io" - repository: "containers/vllm-openai" - tag: "0.9.1" - imagePullPolicy: "IfNotPresent" - modelURL: "mistralai/Mistral-7B-Instruct-v0.2" - replicaCount: 1 - requestCPU: 10 - requestMemory: "16Gi" - requestGPU: 1 - pvcStorage: "50Gi" - storageClass: - vllmConfig: - maxModelLen: 4096 - hf_token: ----- -endif::[] ==== [#vllm-helm-overrides-offloading] @@ -543,13 +392,13 @@ These experimental features are only supported on newer GPU generations. It is not recommended to enable them without a compelling reason. ===== -ifdef::deployment_standard[] [source,yaml] ---- # vllm_custom_overrides.yaml global: imagePullSecrets: - application-collection + {imgRegistry}} servingEngineSpec: runtimeClassName: "nvidia" modelSpec: @@ -576,42 +425,6 @@ servingEngineSpec: hf_token: ---- -endif::[] -ifdef::deployment_airgap[] -[source,yaml] ----- -# vllm_custom_overrides.yaml -global: - imagePullSecrets: - - application-collecti1on - imageRegistry: :5043 -servingEngineSpec: - runtimeClassName: "nvidia" - modelSpec: - - name: "mistral" - registry: "dp.apps.rancher.io" - repository: "containers/lmcache-vllm-openai" - tag: "0.3.2" - imagePullPolicy: "IfNotPresent" - modelURL: "mistralai/Mistral-7B-Instruct-v0.2" - replicaCount: 1 - requestCPU: 10 - requestMemory: "40Gi" - requestGPU: 1 - pvcStorage: "50Gi" - storageClass: - pvcAccessMode: - - ReadWriteOnce - vllmConfig: - maxModelLen: 32000 - - lmcacheConfig: - enabled: false - cpuOffloadingBufferSize: "20" - - hf_token: ----- -endif::[] ==== [#vllm-helm-overrides-lmcache] @@ -628,12 +441,7 @@ Setting `lmcacheConfig.enabled` to `true` implicitly enables the `LMCACHE_USE_EX These experimental features are only supported on newer GPU generations. It is not recommended to enable them without a compelling reason. ===== -ifdef::deployment_standard[] -:imgRegistry: {empty} -endif::[] -ifdef::deployment_airgap[] -:imgRegistry: imageRegistry: :5043 -endif::[] + [source,yaml,subs="+attributes"] ---- # vllm_custom_overrides.yaml @@ -704,4 +512,4 @@ routerSpec: routingLogic: "session" sessionKey: "x-user-id" ---- -==== \ No newline at end of file +==== diff --git a/tasks/AI-deployment-ailibrary-installing.adoc b/tasks/AI-deployment-ailibrary-installing.adoc index d740ee9..01f615f 100644 --- a/tasks/AI-deployment-ailibrary-installing.adoc +++ b/tasks/AI-deployment-ailibrary-installing.adoc @@ -132,3 +132,4 @@ endif::[] .. Install {mlflow} as described in xref:mlflow-installing[]. .. Install {qdrant} as described in xref:qdrant-installing[]. .. Install {litellm} as described in xref:litellm-installing[]. + diff --git a/tasks/litellm-installing.adoc b/tasks/litellm-installing.adoc index 7863f38..c93f567 100644 --- a/tasks/litellm-installing.adoc +++ b/tasks/litellm-installing.adoc @@ -57,7 +57,7 @@ include::../snippets/ai-library-requirement.adoc[] Find examples of {litellm} override files in xref:litellm-helm-overrides[] together with a list of all valid options and their values as displayed by the `helm show` commands. . Install the {litellm} {helm} chart using the `litellm_custom_overrides.yaml` file by running the following command. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -66,7 +66,7 @@ ifdef::deployment_standard[] -f litellm_custom_overrides.yaml ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -77,7 +77,7 @@ ifdef::deployment_airgap[] ---- endif::[] -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [#litellm-upgrading] == Upgrading {litellm} diff --git a/tasks/mcpo-installing.adoc b/tasks/mcpo-installing.adoc index 79c8c43..0f30f13 100644 --- a/tasks/mcpo-installing.adoc +++ b/tasks/mcpo-installing.adoc @@ -32,10 +32,10 @@ include::../snippets/ai-library-requirement.adoc[] The following file defines multiple {mcp} servers in the `config.mcpServers` section. These servers will be added to the {mcpo} configuration file `config.json`. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] :imgRegistry: {empty} endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] :imgRegistry: imageRegistry: :5043 endif::[] [source,yaml] @@ -73,7 +73,7 @@ config: . After saving the override file as `mcpo_custom_overrides.yaml`, apply its configuration with the following command. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -82,7 +82,7 @@ ifdef::deployment_standard[] -f mcpo_custom_overrides.yaml ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -136,7 +136,7 @@ For more information, see link:https://documentation.suse.com/suse-ai/1.0/html/o To enable selected {mcp} tools by default for a model, refer to link:https://documentation.suse.com/suse-ai/1.0/html/openwebui-configuring/index.html#owui-enabling-default-mcp-services[Enabling default {mcp} services]. ==== -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [#mcpo-upgrading] == Upgrading {mcpo} diff --git a/tasks/milvus-installing.adoc b/tasks/milvus-installing.adoc index d224f5d..4f6f28c 100644 --- a/tasks/milvus-installing.adoc +++ b/tasks/milvus-installing.adoc @@ -227,3 +227,4 @@ To uninstall {milvus}, run the following command: ---- {prompt_user}helm uninstall milvus -n ---- + diff --git a/tasks/mlflow-installing.adoc b/tasks/mlflow-installing.adoc index 01076ce..8808005 100644 --- a/tasks/mlflow-installing.adoc +++ b/tasks/mlflow-installing.adoc @@ -3,10 +3,10 @@ :revdate: 2026-03-25 :page-revdate: {revdate} -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] :imgRegistry: {empty} endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] :imgRegistry: imageRegistry: :5043 endif::[] @@ -132,8 +132,9 @@ volumeMounts: [] # readOnly: true nodeSelector: {} tolerations: [] -affinity: {} +affinity: {} ---- + . Replace `mlflow/template/deployment.yaml` with the following content: + [source,yaml] @@ -214,6 +215,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} ---- + . Install {mlflow} using the following command: + [source,,subs="+attributes"] @@ -320,7 +322,7 @@ services: + [source,subs="+attributes"] ---- -(venv) {exampleuser}@localhost:~[] docker ps +{prompt_user}(venv) {exampleuser}@localhost:~[] docker ps CONTAINER ID IMAGE ... STATUS PORTS NAMES 1e58723cb3d mlflow:3.6.0 Up 23 seconds 0.0.0.0:5000->5000/tcp... mlflow ---- diff --git a/tasks/opensearch-installing.adoc b/tasks/opensearch-installing.adoc index 2724c72..9c367f4 100644 --- a/tasks/opensearch-installing.adoc +++ b/tasks/opensearch-installing.adoc @@ -3,10 +3,10 @@ :revdate: 2026-03-24 :page-revdate: {revdate} -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] :imgRegistry: {empty} endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] :imgRegistry: imageRegistry: :5043 endif::[] @@ -61,7 +61,7 @@ include::../snippets/ai-library-requirement.adoc[] + For a single-node cluster, use the following template file: + -[source,yaml] +[source,yaml,subs="+attributes"] ---- # opensearch_custom_overrides.yaml global: @@ -96,7 +96,7 @@ config: + For a multi-node cluster, use the following template file: + -[source,yaml] +[source,yaml,subs="+attributes"] ---- # opensearch_custom_overrides.yaml global: @@ -159,7 +159,7 @@ config: ---- . After saving the override file as `opensearch_custom_overrides.yaml`, apply its configuration with the following command. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -168,7 +168,7 @@ ifdef::deployment_standard[] -f ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -241,7 +241,7 @@ extraEnvVars: ---- . Redeploy {owui}. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -250,7 +250,7 @@ ifdef::deployment_standard[] -f ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ diff --git a/tasks/owui-installing.adoc b/tasks/owui-installing.adoc index 676c6e8..7c33bf4 100644 --- a/tasks/owui-installing.adoc +++ b/tasks/owui-installing.adoc @@ -35,7 +35,7 @@ Find more details in xref:owui-helm-overrides[]. For a list of all installation options with examples, refer to xref:owui-helmchart-values[]. . Install the {owui} {helm} chart using the `owui_custom_overrides.yaml` override file. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -44,7 +44,7 @@ ifdef::deployment_standard[] -f ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ diff --git a/tasks/pytorch-installing.adoc b/tasks/pytorch-installing.adoc index b29791a..3633efc 100644 --- a/tasks/pytorch-installing.adoc +++ b/tasks/pytorch-installing.adoc @@ -29,7 +29,7 @@ include::../snippets/ai-library-requirement.adoc[] Find examples of {pytorch} override files in xref:pytorch-helm-overrides[] and a list of all valid options and their values in xref:pytorch-helm-values[]. . Install the {pytorch} {helm} chart using the `pytorch_custom_overrides.yaml` file using the following command. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -38,7 +38,7 @@ ifdef::deployment_standard[] -f pytorch_custom_overrides.yaml ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -49,7 +49,7 @@ ifdef::deployment_airgap[] ---- endif::[] -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [#pytorch-upgrading] == Upgrading {pytorch} diff --git a/tasks/qdrant-installing.adoc b/tasks/qdrant-installing.adoc index 02d8519..02dc686 100644 --- a/tasks/qdrant-installing.adoc +++ b/tasks/qdrant-installing.adoc @@ -36,7 +36,7 @@ include::../snippets/ai-library-requirement.adoc[] Find examples of {qdrant} override files in xref:qdrant-helm-overrides[] and a list of all valid options and their values from running helm show values. . Install the {qdrant} {helm} chart using the `qdrant_custom_overrides.yaml` file using the following command. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -45,7 +45,7 @@ ifdef::deployment_standard[] -f qdrant_custom_overrides.yaml ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -93,7 +93,7 @@ extraEnvVars: ---- . Redeploy {owui}. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -102,7 +102,7 @@ ifdef::deployment_standard[] -f ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -123,7 +123,7 @@ Defaulted container "open-webui" out of: open-webui, copy-app-data (init) VECTOR_DB=qdrant ---- -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [#qdrant-upgrading] == Upgrading {qdrant} diff --git a/tasks/vllm-installing.adoc b/tasks/vllm-installing.adoc index ede62a8..28577a4 100644 --- a/tasks/vllm-installing.adoc +++ b/tasks/vllm-installing.adoc @@ -52,7 +52,7 @@ The current release of {productname} {vllm} does not support Ray and LoraControl Find examples of override files in xref:vllm-helm-overrides[]. . After saving the override file as `vllm_custom_overrides.yaml`, apply its configuration with the following command. + -ifdef::deployment_standard[] +ifeval::["{PROF_DEPLOYMENT}" == "standard"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ @@ -61,7 +61,7 @@ ifdef::deployment_standard[] -f ---- endif::[] -ifdef::deployment_airgap[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] [source,bash,subs="+attributes"] ---- {prompt_user}helm upgrade --install \ From c5e0c827b750938fe8bdbda8f834b48ff23386e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ba=C5=BEant?= Date: Thu, 9 Apr 2026 00:37:20 +0200 Subject: [PATCH 3/3] fixed attributes in code blocks --- references/litellm-helm-overrides.adoc | 2 +- references/ollama-helmchart.adoc | 15 ++++++++------- references/owui-helm-overrides.adoc | 22 ++++++++++++---------- references/pytorch-helm-overrides.adoc | 10 +++++----- references/qdrant-helm-overrides.adoc | 4 ++-- references/vllm-helm-overrides.adoc | 10 +++++----- tasks/cert-manager-installing.adoc | 4 ++-- tasks/mcpo-installing.adoc | 15 ++++++++------- tasks/milvus-installing.adoc | 17 +++++++++-------- tasks/mlflow-installing.adoc | 2 +- tasks/ollama-installing.adoc | 4 ++-- tasks/opensearch-installing.adoc | 4 ++-- tasks/owui-installing.adoc | 4 ++-- tasks/vllm-installing.adoc | 4 ++-- 14 files changed, 61 insertions(+), 56 deletions(-) diff --git a/references/litellm-helm-overrides.adoc b/references/litellm-helm-overrides.adoc index d13062f..3c8ee02 100644 --- a/references/litellm-helm-overrides.adoc +++ b/references/litellm-helm-overrides.adoc @@ -33,7 +33,7 @@ endif::[] [#litellm-chart-example-basic] .Basic override file with {postgresql} deployment and master key automatically generated. ==== -[source,yaml] +[source,yaml,subs="+attributes"] ---- # litellm_custom_overrides.yaml global: diff --git a/references/ollama-helmchart.adoc b/references/ollama-helmchart.adoc index 120965d..e93b4c5 100644 --- a/references/ollama-helmchart.adoc +++ b/references/ollama-helmchart.adoc @@ -3,6 +3,13 @@ :revdate: 2026-03-24 :page-revdate: {revdate} +ifeval::["{PROF_DEPLOYMENT}" == "standard"] +:imgRegistry: {empty} +endif::[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] +:imgRegistry: imageRegistry: :5043 +endif::[] + include::../snippets/helm-chart-overrides-intro.adoc[] [IMPORTANT] @@ -28,13 +35,7 @@ If you do not want to use the {nvidia} GPU, remove the `gpu` section from `ollam .Basic override file with GPU and two models pulled at startup ==== -ifeval::["{PROF_DEPLOYMENT}" == "standard"] -:imgRegistry: {empty} -endif::[] -ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] -:imgRegistry: imageRegistry: :5043 -endif::[] -[source,yaml] +[source,yaml,subs="+attributes"] ---- global: imagePullSecrets: diff --git a/references/owui-helm-overrides.adoc b/references/owui-helm-overrides.adoc index dfaefae..5502f18 100644 --- a/references/owui-helm-overrides.adoc +++ b/references/owui-helm-overrides.adoc @@ -3,19 +3,21 @@ :revdate: 2026-03-24 :page-revdate: {revdate} -include::../snippets/helm-chart-overrides-intro.adoc[] - -[#owui-helm-overrides-ollama] -.{owui} override file with {ollama} included -==== -The following override file installs {ollama} during the {owui} installation. ifeval::["{PROF_DEPLOYMENT}" == "standard"] :imgRegistry: {empty} endif::[] ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] :imgRegistry: imageRegistry: :5043 endif::[] -[source,yaml] + +include::../snippets/helm-chart-overrides-intro.adoc[] + +[#owui-helm-overrides-ollama] +.{owui} override file with {ollama} included +==== +The following override file installs {ollama} during the {owui} installation. + +[source,yaml,subs="+attributes"] ---- global: imagePullSecrets: @@ -115,7 +117,7 @@ The default value is '0p3n-w3bu!'. ==== The following override file installs {ollama} separately from the {owui} installation. -[source,yaml] +[source,yaml,subs="+attributes"] ---- global: imagePullSecrets: @@ -182,7 +184,7 @@ This simple filter adds a limit to the number of question and answer turns durin Pipelines normally require additional configuration provided either via environment variables or specified in the {owui} Web UI. ===== -[source,yaml] +[source,yaml,subs="+attributes"] ---- global: imagePullSecrets: @@ -265,7 +267,7 @@ To install open-webui-pipelines as a stand-alone component, use the following co Following is an example of the `open-webui-pipelines-values.yaml` override file. -[source,yaml] +[source,yaml,subs="+attributes"] ---- runtimeClassName: nvidia global: diff --git a/references/pytorch-helm-overrides.adoc b/references/pytorch-helm-overrides.adoc index 1cadb8e..f4d2655 100644 --- a/references/pytorch-helm-overrides.adoc +++ b/references/pytorch-helm-overrides.adoc @@ -15,7 +15,7 @@ endif::[] [#pytorch-chart-example-basic] .Basic override file with GPU enabled ==== -[source,yaml] +[source,yaml,subs="+attributes"] ---- # pytorch_custom_overrides.yaml runtimeClassName: nvidia @@ -53,7 +53,7 @@ To create a ConfigMap, run the following command: MY_CONFIG_MAP -n SUSE_AI_NAMESPACE ---- -[source,yaml] +[source,yaml,subs="+attributes"] ---- # pytorch_custom_overrides.yaml runtimeClassName: nvidia @@ -88,7 +88,7 @@ For production use, we recommend using a storage solution suitable for persisten ==== Move the `entrypoint.sh` file plus any helper files under the `scripts/` directory. -[source,yaml] +[source,yaml,subs="+attributes"] ---- # pytorch_custom_overrides.yaml runtimeClassName: nvidia @@ -124,7 +124,7 @@ For production use, we recommend using a storage solution suitable for persisten [#pytorch-chart-example-git-public] .Git repository clone: public with no authentication ==== -[source,yaml] +[source,yaml,subs="+attributes"] ---- # pytorch_custom_overrides.yaml runtimeClassName: nvidia @@ -161,7 +161,7 @@ For production use, we recommend using a storage solution suitable for persisten [#pytorch-chart-example-git-private] .Git repository clone: private with authentication ==== -[source,yaml] +[source,yaml,subs="+attributes"] ---- # pytorch_custom_overrides.yaml runtimeClassName: nvidia diff --git a/references/qdrant-helm-overrides.adoc b/references/qdrant-helm-overrides.adoc index deb3f70..8c1fd1e 100644 --- a/references/qdrant-helm-overrides.adoc +++ b/references/qdrant-helm-overrides.adoc @@ -32,7 +32,7 @@ endif::[]` [#qdrant-chart-example-basic] .Basic override file when the cluster has no default storage class set. ==== -[source,yaml] +[source,yaml,subs="+attributes"] ---- # qdrant_custom_overrides.yaml global: @@ -58,7 +58,7 @@ For production use, we recommend using a storage solution suitable for persisten [#qdrant-chart-example-gpu] .An example where {qdrant} uses GPU capabilities. ==== -[source,yaml] +[source,yaml,subs="+attributes"] ---- # qdrant_custom_overrides.yaml global: diff --git a/references/vllm-helm-overrides.adoc b/references/vllm-helm-overrides.adoc index 473e94d..2c301ee 100644 --- a/references/vllm-helm-overrides.adoc +++ b/references/vllm-helm-overrides.adoc @@ -17,7 +17,7 @@ include::../snippets/helm-chart-overrides-intro.adoc[] ==== The following override file installs {vllm} using a model that is publicly available. -[source,yaml] +[source,yaml,subs="+attributes"] ---- global: imagePullSecrets: @@ -123,7 +123,7 @@ For more information, see link:https://huggingface.co/meta-llama/Llama-3.1-8B-In * The `runtimeClassName` specified here is `nvidia`. * Update the `storageClass:` entry for each `modelSpec`. -[source,yaml] +[source,yaml,subs="+attributes"] ---- # vllm_custom_overrides.yaml global: @@ -269,7 +269,7 @@ Apply the specification with the following commands: . Update the custom {vllm} override file with support for PVC. + -[source,yaml] +[source,yaml,subs="+attributes"] ---- # vllm_custom_overrides.yaml global: @@ -333,7 +333,7 @@ Ray is currently not supported. Therefore, sharding a single large model across multiple GPUs is not supported. ===== -[source,yaml] +[source,yaml,subs="+attributes"] ---- # vllm_custom_overrides.yaml global: @@ -392,7 +392,7 @@ These experimental features are only supported on newer GPU generations. It is not recommended to enable them without a compelling reason. ===== -[source,yaml] +[source,yaml,subs="+attributes"] ---- # vllm_custom_overrides.yaml global: diff --git a/tasks/cert-manager-installing.adoc b/tasks/cert-manager-installing.adoc index 1bf6326..0598603 100644 --- a/tasks/cert-manager-installing.adoc +++ b/tasks/cert-manager-installing.adoc @@ -16,9 +16,9 @@ Currently, {certmanager} is no longer part of the {owui} {helm} chart and you ne Before deploying {certmanager}, it is important to know more about the supported configurations and documentation. The following command provides the corresponding details: -[source,bash] +[source,bash,subs="+attributes"] ---- -helm show values oci://dp.apps.rancher.io/charts/cert-manager +{prompt_user}helm show values oci://dp.apps.rancher.io/charts/cert-manager ---- Alternatively, you can also refer to the {certmanager} {helm} chart page on the {sappco} site at link:https://apps.rancher.io/applications/cert-manager[]. diff --git a/tasks/mcpo-installing.adoc b/tasks/mcpo-installing.adoc index 0f30f13..d61ca3d 100644 --- a/tasks/mcpo-installing.adoc +++ b/tasks/mcpo-installing.adoc @@ -3,6 +3,13 @@ :revdate: 2026-03-25 :page-revdate: {revdate} +ifeval::["{PROF_DEPLOYMENT}" == "standard"] +:imgRegistry: {empty} +endif::[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] +:imgRegistry: imageRegistry: :5043 +endif::[] + {mcp} (Model Context Protocol) is an open source standard for connecting AI applications{mdash}such as {suseai}{mdash}to external systems. These external systems can include data sources like databases or local files, or tools like calculators or search engines. @@ -32,13 +39,7 @@ include::../snippets/ai-library-requirement.adoc[] The following file defines multiple {mcp} servers in the `config.mcpServers` section. These servers will be added to the {mcpo} configuration file `config.json`. + -ifeval::["{PROF_DEPLOYMENT}" == "standard"] -:imgRegistry: {empty} -endif::[] -ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] -:imgRegistry: imageRegistry: :5043 -endif::[] -[source,yaml] +[source,yaml,subs="+attributes"] ---- # mcpo_custom_overrides.yaml global: diff --git a/tasks/milvus-installing.adoc b/tasks/milvus-installing.adoc index 4f6f28c..994daa3 100644 --- a/tasks/milvus-installing.adoc +++ b/tasks/milvus-installing.adoc @@ -4,6 +4,13 @@ :revdate: 2025-12-22 :page-revdate: {revdate} +ifeval::["{PROF_DEPLOYMENT}" == "standard"] +:imgRegistry: {empty} +endif::[] +ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] +:imgRegistry: imageRegistry: :5043 +endif::[] + {milvus} is a scalable, high-performance vector database designed for AI applications. It enables efficient organization and searching of massive unstructured datasets, including text, images and multi-modal content. This procedure walks you through the installation of {milvus} and its dependencies. @@ -51,13 +58,7 @@ As a template, you can use the `values.yaml` file that is included in the `chart ==== endif::[] + -ifeval::["{PROF_DEPLOYMENT}" == "standard"] -:imgRegistry: {empty} -endif::[] -ifeval::["{PROF_DEPLOYMENT}" == "airgapped"] -:imgRegistry: imageRegistry: :5043 -endif::[] -[source,yaml] +[source,yaml,subs="+attributes"] ---- global: imagePullSecrets: @@ -214,7 +215,7 @@ To upgrade {milvus}, identify the new version number and run the following comma milvus oci://dp.apps.rancher.io/charts/milvus \ -n \ --version \ - -f + -f milvus_custom_overrides.yaml ---- endif::[] diff --git a/tasks/mlflow-installing.adoc b/tasks/mlflow-installing.adoc index 8808005..17e85fa 100644 --- a/tasks/mlflow-installing.adoc +++ b/tasks/mlflow-installing.adoc @@ -46,7 +46,7 @@ The command creates an `mlflow` directory with the basic file structure for a ch . Replace `mlflow/values.yaml` with the following content. Replace `CONTAINER_VERSION` with the current chart version. + -[source,yaml] +[source,yaml,subs="+attributes"] ---- # values.yaml replicaCount: 1 diff --git a/tasks/ollama-installing.adoc b/tasks/ollama-installing.adoc index c635d83..aa7d372 100644 --- a/tasks/ollama-installing.adoc +++ b/tasks/ollama-installing.adoc @@ -18,9 +18,9 @@ If you decide to install {ollama} separately, disable its installation during th Before deploying {ollama}, it is important to know more about the supported configurations and documentation. The following command provides the corresponding details: -[source,bash] +[source,bash,subs="+attributes"] ---- -helm show values oci://dp.apps.rancher.io/charts/ollama +{prompt_user}helm show values oci://dp.apps.rancher.io/charts/ollama ---- Alternatively, you can also refer to the {ollama} {helm} chart page on the {sappco} site at link:https://apps.rancher.io/applications/ollama[]. diff --git a/tasks/opensearch-installing.adoc b/tasks/opensearch-installing.adoc index 9c367f4..436279a 100644 --- a/tasks/opensearch-installing.adoc +++ b/tasks/opensearch-installing.adoc @@ -21,9 +21,9 @@ Its functionality can be extended by plug-ins that enhance features like search, Before deploying {opensearch}, it is important to know more about the supported configurations and documentation. The following command provides the corresponding details: -[source,bash] +[source,bash,subs="+attributes"] ---- -helm show values oci://dp.apps.rancher.io/charts/opensearch +{prompt_user}helm show values oci://dp.apps.rancher.io/charts/opensearch ---- Alternatively, you can also refer to the {opensearch} {helm} chart page on the {sappco} site at link:https://apps.rancher.io/applications/opensearch[]. diff --git a/tasks/owui-installing.adoc b/tasks/owui-installing.adoc index 7c33bf4..383ee43 100644 --- a/tasks/owui-installing.adoc +++ b/tasks/owui-installing.adoc @@ -11,9 +11,9 @@ include::../snippets/openwebui-intro.adoc[] Before deploying {owui}, it is important to know more about the supported configurations and documentation. The following command provides the corresponding details: -[source,bash] +[source,bash,subs="+attributes"] ---- -helm show values oci://dp.apps.rancher.io/charts/open-webui +{prompt_user}helm show values oci://dp.apps.rancher.io/charts/open-webui ---- Alternatively, you can also refer to the {owui} {helm} chart page on the {sappco} site at link:https://apps.rancher.io/applications/open-webui[]. diff --git a/tasks/vllm-installing.adoc b/tasks/vllm-installing.adoc index 28577a4..3ba5ea7 100644 --- a/tasks/vllm-installing.adoc +++ b/tasks/vllm-installing.adoc @@ -23,9 +23,9 @@ It consists of the following components: Before deploying {vllm}, it is important to know more about the supported configurations and documentation. The following command provides the corresponding details: -[source,bash] +[source,bash,subs="+attributes"] ---- -helm show values oci://dp.apps.rancher.io/charts/vllm +{prompt_user}helm show values oci://dp.apps.rancher.io/charts/vllm ---- Alternatively, you can also refer to the {vllm} {helm} chart page on the {sappco} site at link:https://apps.rancher.io/applications/vllm[].