From 6257b1640c9619067314a488f3ac10ab6010ee5f Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Tue, 26 May 2026 00:56:46 +0200
Subject: [PATCH 1/4] integration with OpenVINO Model Server

---
 README.md                                     |  69 ++++---
 main.go                                       |   8 +
 pkg/distribution/builder/from_directory.go    |  15 +-
 .../builder/from_directory_test.go            |  39 ++++
 pkg/distribution/huggingface/model.go         |  38 ++--
 pkg/distribution/huggingface/repository.go    |  34 ++++
 .../huggingface/repository_test.go            |  50 +++++
 pkg/envconfig/envconfig.go                    |   6 +
 pkg/inference/backends/ovms/ovms.go           | 171 ++++++++++++++++++
 pkg/inference/backends/ovms/ovms_test.go      |  49 +++++
 pkg/inference/backends/runner.go              |   2 +-
 pkg/inference/scheduling/http_handler.go      |   5 +
 pkg/inference/scheduling/runner.go            |  13 +-
 13 files changed, 449 insertions(+), 50 deletions(-)
 create mode 100644 pkg/inference/backends/ovms/ovms.go
 create mode 100644 pkg/inference/backends/ovms/ovms_test.go

diff --git a/README.md b/README.md
index 56b428503..46a7689bc 100644
--- a/README.md
+++ b/README.md
@@ -300,6 +300,34 @@ docker buildx build \
 
 The vLLM wheels are sourced from the official vLLM GitHub Releases at `https://github.com/vllm-project/vllm/releases`, which provides prebuilt wheels for each release version.
 
+### OVMS integration
+
+Docker Model Runner can also run an OVMS backend.
+
+- Default OVMS binary path: `./ovms/bin/ovms`
+- Override binary path with: `OVMS_SERVER_PATH`
+
+When the runner starts, it will try to initialize OVMS as an available backend. If you are running from source and want to use a custom OVMS binary, set:
+
+```sh
+OVMS_SERVER_PATH=/absolute/path/to/ovms ./model-runner
+```
+
+You can target OVMS explicitly through the backend-prefixed OpenAI-compatible routes:
+
+```sh
+# List models exposed via OVMS backend routing
+curl http://localhost:13434/engines/ovms/v1/models
+
+# Example chat/completions call through OVMS backend routing
+curl http://localhost:13434/engines/ovms/v1/chat/completions -X POST -d '{
+  "model": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov",
+  "messages": [
+    {"role": "user", "content": "Hello from OVMS"}
+  ]
+}'
+```
+
 ## API Examples
 
 The Model Runner exposes a REST API that can be accessed via TCP port. You can interact with it using curl commands.
@@ -310,17 +338,17 @@ When running with `docker-run`, you can use regular HTTP requests:
 
 ```sh
 # List all available models
-curl http://localhost:8080/models
+curl http://localhost:13434/models
 
 # Create a new model
-curl http://localhost:8080/models/create -X POST -d '{"from": "ai/smollm2"}'
+curl http://localhost:13434/models/create -X POST -d '{"from": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov"}'
 
 # Get information about a specific model
-curl http://localhost:8080/models/ai/smollm2
+curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov
 
 # Chat with a model
-curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
-  "model": "ai/smollm2",
+curl http://localhost:13434/engines/ovms/v1/chat/completions -X POST -d '{
+  "model": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov",
   "messages": [
     {"role": "system", "content": "You are a helpful assistant."},
     {"role": "user", "content": "Hello, how are you?"}
@@ -328,37 +356,8 @@ curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
 }'
 
 # Delete a model
-curl http://localhost:8080/models/ai/smollm2 -X DELETE
+curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE
 
-# Get metrics
-curl http://localhost:8080/metrics
-```
-
-The response will contain the model's reply:
-
-```json
-{
-  "id": "chat-12345",
-  "object": "chat.completion",
-  "created": 1682456789,
-  "model": "ai/smollm2",
-  "choices": [
-    {
-      "index": 0,
-      "message": {
-        "role": "assistant",
-        "content": "I'm doing well, thank you for asking! How can I assist you today?"
-      },
-      "finish_reason": "stop"
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 24,
-    "completion_tokens": 16,
-    "total_tokens": 40
-  }
-}
-```
 
 ### Features
 
diff --git a/main.go b/main.go
index 8034d9553..1e9cfd2ff 100644
--- a/main.go
+++ b/main.go
@@ -19,6 +19,7 @@ import (
 	"github.com/docker/model-runner/pkg/inference"
 	"github.com/docker/model-runner/pkg/inference/backends/diffusers"
 	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
+	"github.com/docker/model-runner/pkg/inference/backends/ovms"
 	"github.com/docker/model-runner/pkg/inference/backends/sglang"
 	"github.com/docker/model-runner/pkg/inference/config"
 	"github.com/docker/model-runner/pkg/inference/models"
@@ -64,6 +65,7 @@ func main() {
 	sglangServerPath := envconfig.SGLangServerPath()
 	mlxServerPath := envconfig.MLXServerPath()
 	diffusersServerPath := envconfig.DiffusersServerPath()
+	ovmsServerPath := envconfig.OVMSServerPath()
 	vllmMetalServerPath := envconfig.VLLMMetalServerPath()
 
 	// Create a proxy-aware HTTP transport
@@ -92,6 +94,9 @@ func main() {
 	if vllmMetalServerPath != "" {
 		log.Info("VLLM_METAL_SERVER_PATH", "path", vllmMetalServerPath)
 	}
+	if ovmsServerPath != "" {
+		log.Info("OVMS_SERVER_PATH", "path", ovmsServerPath)
+	}
 
 	// Create llama.cpp configuration from environment variables
 	llamaCppConfig, err := createLlamaCppConfigFromEnv()
@@ -129,6 +134,9 @@ func main() {
 				IncludeDiffusers:     true,
 				DiffusersPath:        diffusersServerPath,
 			}),
+			routing.BackendDef{Name: ovms.Name, Init: func(mm *models.Manager) (inference.Backend, error) {
+				return ovms.New(log, mm, log.With("component", ovms.Name), ovmsServerPath)
+			}},
 			routing.BackendDef{Name: sglang.Name, Init: func(mm *models.Manager) (inference.Backend, error) {
 				return sglang.New(log, mm, log.With("component", sglang.Name), nil, sglangServerPath)
 			}},
diff --git a/pkg/distribution/builder/from_directory.go b/pkg/distribution/builder/from_directory.go
index fdef17114..1362f6fe4 100644
--- a/pkg/distribution/builder/from_directory.go
+++ b/pkg/distribution/builder/from_directory.go
@@ -32,6 +32,10 @@ type DirectoryOptions struct {
 	// When set, it overrides the default behavior of using time.Now().
 	// This is useful for producing deterministic OCI digests.
 	Created *time.Time
+
+	// AllowNoWeightFiles allows packaging a directory even when it contains no
+	// GGUF/SafeTensors/DDUF weight files.
+	AllowNoWeightFiles bool
 }
 
 // DirectoryOption is a functional option for configuring FromDirectory.
@@ -62,6 +66,15 @@ func WithCreatedTime(t time.Time) DirectoryOption {
 	}
 }
 
+// WithAllowNoWeightFiles allows FromDirectory to succeed even when no standard
+// model weight files are present. This is used for formats such as OpenVINO IR
+// where model files are represented differently (for example .xml + .bin pairs).
+func WithAllowNoWeightFiles() DirectoryOption {
+	return func(opts *DirectoryOptions) {
+		opts.AllowNoWeightFiles = true
+	}
+}
+
 // FromDirectory creates a Builder from a directory containing model files.
 // It recursively scans the directory and adds each non-hidden file as a separate layer.
 // Each layer's filepath annotation preserves the relative path from the directory root.
@@ -195,7 +208,7 @@ func FromDirectory(dirPath string, opts ...DirectoryOption) (*Builder, error) {
 		return nil, fmt.Errorf("no files found in directory: %s", dirPath)
 	}
 
-	if len(weightFiles) == 0 {
+	if len(weightFiles) == 0 && !options.AllowNoWeightFiles {
 		return nil, fmt.Errorf("no weight files (safetensors, GGUF, or DDUF) found in directory: %s", dirPath)
 	}
 
diff --git a/pkg/distribution/builder/from_directory_test.go b/pkg/distribution/builder/from_directory_test.go
index 1fc1145aa..7dca73a99 100644
--- a/pkg/distribution/builder/from_directory_test.go
+++ b/pkg/distribution/builder/from_directory_test.go
@@ -3,6 +3,7 @@ package builder
 import (
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"
 	"time"
 )
@@ -170,6 +171,44 @@ func TestFromDirectoryWithExclusions(t *testing.T) {
 	}
 }
 
+func TestFromDirectoryNoStandardWeights(t *testing.T) {
+	tmpDir := t.TempDir()
+	createTestFile(t, tmpDir, "openvino/model.xml", "<net></net>")
+	createTestFile(t, tmpDir, "openvino/model.bin", "weights")
+	createTestFile(t, tmpDir, "openvino/config.json", "{}")
+
+	_, err := FromDirectory(tmpDir)
+	if err == nil {
+		t.Fatal("expected error when directory has no GGUF/SafeTensors/DDUF weights")
+	}
+
+	if got := err.Error(); got == "" || !strings.Contains(got, "no weight files") {
+		t.Fatalf("expected no weight files error, got %q", got)
+	}
+}
+
+func TestFromDirectoryAllowNoWeightFiles(t *testing.T) {
+	tmpDir := t.TempDir()
+	createTestFile(t, tmpDir, "openvino/model.xml", "<net></net>")
+	createTestFile(t, tmpDir, "openvino/model.bin", "weights")
+	createTestFile(t, tmpDir, "openvino/config.json", "{}")
+
+	b, err := FromDirectory(tmpDir, WithAllowNoWeightFiles())
+	if err != nil {
+		t.Fatalf("FromDirectory with WithAllowNoWeightFiles failed: %v", err)
+	}
+
+	mdl := b.Model()
+	layers, err := mdl.Layers()
+	if err != nil {
+		t.Fatalf("Failed to get layers: %v", err)
+	}
+
+	if len(layers) != 3 {
+		t.Errorf("Expected 3 layers, got %d", len(layers))
+	}
+}
+
 func TestShouldExclude(t *testing.T) {
 	tests := []struct {
 		name       string
diff --git a/pkg/distribution/huggingface/model.go b/pkg/distribution/huggingface/model.go
index c714bfd67..75562f8f0 100644
--- a/pkg/distribution/huggingface/model.go
+++ b/pkg/distribution/huggingface/model.go
@@ -31,8 +31,9 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
 
 	// Filter to model files (weights + configs)
 	weightFiles, configFiles := FilterModelFiles(files)
+	isOpenVINORepo := IsOpenVINOModel(files)
 
-	if len(weightFiles) == 0 {
+	if len(weightFiles) == 0 && !isOpenVINORepo {
 		return nil, fmt.Errorf("no model weight files (GGUF or SafeTensors) found in repository %s", repo)
 	}
 
@@ -54,10 +55,20 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
 		}
 	}
 
-	// Combine all files to download
-	allFiles := append(weightFiles, configFiles...)
-	if mmprojFile != nil {
-		allFiles = append(allFiles, *mmprojFile)
+	// Combine all files to download.
+	// For OpenVINO repositories, pull all repository files so the full IR layout is preserved.
+	var allFiles []RepoFile
+	if isOpenVINORepo {
+		for _, f := range files {
+			if f.Type == "file" {
+				allFiles = append(allFiles, f)
+			}
+		}
+	} else {
+		allFiles = append(weightFiles, configFiles...)
+		if mmprojFile != nil {
+			allFiles = append(allFiles, *mmprojFile)
+		}
 	}
 
 	if progressWriter != nil {
@@ -90,7 +101,7 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
 		_ = progress.WriteProgress(progressWriter, "Building model artifact...", 0, 0, 0, "", "pull")
 	}
 
-	model, err := buildModelFromFiles(result.LocalPaths, weightFiles, configFiles, tempDir, createdTime)
+	model, err := buildModelFromFiles(result.LocalPaths, weightFiles, configFiles, tempDir, createdTime, isOpenVINORepo)
 	if err != nil {
 		return nil, fmt.Errorf("build model: %w", err)
 	}
@@ -103,26 +114,29 @@ func BuildModel(ctx context.Context, client *Client, repo, revision, tag string,
 // which preserves directory structure and adds each file as an individual layer with
 // filepath annotations. For GGUF models, it uses the V0.1 packaging (FromPaths)
 // for backward compatibility.
-func buildModelFromFiles(localPaths map[string]string, weightFiles, configFiles []RepoFile, tempDir string, createdTime *time.Time) (types.ModelArtifact, error) {
-	// Check if this is a safetensors model - use V0.2 packaging
-	if isSafetensorsModel(weightFiles) {
-		return buildSafetensorsModelV02(tempDir, createdTime)
+func buildModelFromFiles(localPaths map[string]string, weightFiles, configFiles []RepoFile, tempDir string, createdTime *time.Time, allowNoStandardWeights bool) (types.ModelArtifact, error) {
+	// Safetensors and OpenVINO repos are packaged with V0.2 layer-per-file packaging.
+	if isSafetensorsModel(weightFiles) || allowNoStandardWeights {
+		return buildDirectoryModelV02(tempDir, createdTime, allowNoStandardWeights)
 	}
 
 	// For GGUF models, use V0.1 packaging (backward compatible)
 	return buildGGUFModelV01(localPaths, weightFiles, configFiles, createdTime)
 }
 
-// buildSafetensorsModelV02 builds a safetensors model using V0.2 layer-per-file packaging.
+// buildDirectoryModelV02 builds a model using V0.2 layer-per-file packaging.
 // It uses builder.FromDirectory which recursively scans the tempDir and creates one layer
 // per file, preserving nested directory structure with filepath annotations.
 // If createdTime is non-nil, it is used as the creation timestamp for the OCI config
 // to produce deterministic digests. Otherwise time.Now() is used.
-func buildSafetensorsModelV02(tempDir string, createdTime *time.Time) (types.ModelArtifact, error) {
+func buildDirectoryModelV02(tempDir string, createdTime *time.Time, allowNoStandardWeights bool) (types.ModelArtifact, error) {
 	var dirOpts []builder.DirectoryOption
 	if createdTime != nil {
 		dirOpts = append(dirOpts, builder.WithCreatedTime(*createdTime))
 	}
+	if allowNoStandardWeights {
+		dirOpts = append(dirOpts, builder.WithAllowNoWeightFiles())
+	}
 
 	b, err := builder.FromDirectory(tempDir, dirOpts...)
 	if err != nil {
diff --git a/pkg/distribution/huggingface/repository.go b/pkg/distribution/huggingface/repository.go
index 1c7bb3f86..cd5bbd85b 100644
--- a/pkg/distribution/huggingface/repository.go
+++ b/pkg/distribution/huggingface/repository.go
@@ -62,6 +62,40 @@ func FilterModelFiles(repoFiles []RepoFile) (weights []RepoFile, configs []RepoF
 	return weights, configs
 }
 
+// IsOpenVINOModel returns true when the repository contains at least one OpenVINO
+// IR weight pair (.xml + .bin with the same stem), including nested paths.
+func IsOpenVINOModel(repoFiles []RepoFile) bool {
+	xmlFiles := make(map[string]struct{})
+	binFiles := make(map[string]struct{})
+
+	for _, f := range repoFiles {
+		if f.Type != "file" {
+			continue
+		}
+
+		ext := strings.ToLower(path.Ext(f.Path))
+		if ext != ".xml" && ext != ".bin" {
+			continue
+		}
+
+		stem := strings.TrimSuffix(f.Path, path.Ext(f.Path))
+		switch ext {
+		case ".xml":
+			xmlFiles[stem] = struct{}{}
+		case ".bin":
+			binFiles[stem] = struct{}{}
+		}
+	}
+
+	for stem := range xmlFiles {
+		if _, ok := binFiles[stem]; ok {
+			return true
+		}
+	}
+
+	return false
+}
+
 // TotalSize calculates the total size of files
 func TotalSize(repoFiles []RepoFile) int64 {
 	var total int64
diff --git a/pkg/distribution/huggingface/repository_test.go b/pkg/distribution/huggingface/repository_test.go
index 16e3a159b..4cbe350ec 100644
--- a/pkg/distribution/huggingface/repository_test.go
+++ b/pkg/distribution/huggingface/repository_test.go
@@ -102,3 +102,53 @@ func TestIsSafetensorsModel(t *testing.T) {
 		})
 	}
 }
+
+func TestIsOpenVINOModel(t *testing.T) {
+	tests := []struct {
+		name  string
+		files []RepoFile
+		want  bool
+	}{
+		{
+			name: "matching xml/bin pair at root",
+			files: []RepoFile{
+				{Type: "file", Path: "openvino_model.xml"},
+				{Type: "file", Path: "openvino_model.bin"},
+			},
+			want: true,
+		},
+		{
+			name: "matching xml/bin pair in subdirectory",
+			files: []RepoFile{
+				{Type: "file", Path: "int4/openvino_model.xml"},
+				{Type: "file", Path: "int4/openvino_model.bin"},
+				{Type: "file", Path: "int4/config.json"},
+			},
+			want: true,
+		},
+		{
+			name: "xml without matching bin",
+			files: []RepoFile{
+				{Type: "file", Path: "openvino_model.xml"},
+				{Type: "file", Path: "other_model.bin"},
+			},
+			want: false,
+		},
+		{
+			name: "no openvino files",
+			files: []RepoFile{
+				{Type: "file", Path: "model.safetensors"},
+				{Type: "file", Path: "config.json"},
+			},
+			want: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := IsOpenVINOModel(tt.files); got != tt.want {
+				t.Errorf("IsOpenVINOModel() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/pkg/envconfig/envconfig.go b/pkg/envconfig/envconfig.go
index 3592b9139..9b7da5e59 100644
--- a/pkg/envconfig/envconfig.go
+++ b/pkg/envconfig/envconfig.go
@@ -159,6 +159,12 @@ func VLLMMetalServerPath() string {
 	return Var("VLLM_METAL_SERVER_PATH")
 }
 
+// OVMSServerPath returns the optional path to the OVMS server binary.
+// Configured via OVMS_SERVER_PATH.
+func OVMSServerPath() string {
+	return Var("OVMS_SERVER_PATH")
+}
+
 // DisableMetrics is true when DISABLE_METRICS is set to a truthy value (e.g. "1").
 var DisableMetrics = Bool("DISABLE_METRICS")
 
diff --git a/pkg/inference/backends/ovms/ovms.go b/pkg/inference/backends/ovms/ovms.go
new file mode 100644
index 000000000..021b1f00f
--- /dev/null
+++ b/pkg/inference/backends/ovms/ovms.go
@@ -0,0 +1,171 @@
+package ovms
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log/slog"
+	"net"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/docker/model-runner/pkg/inference"
+	"github.com/docker/model-runner/pkg/inference/backends"
+	"github.com/docker/model-runner/pkg/inference/models"
+	"github.com/docker/model-runner/pkg/logging"
+)
+
+const (
+	// Name is the backend name.
+	Name = "ovms"
+
+	defaultBinaryPath = "./ovms/bin/ovms"
+)
+
+var ErrOVMSNotFound = errors.New("ovms binary not found")
+
+type ovms struct {
+	log              logging.Logger
+	modelManager     *models.Manager
+	serverLog        logging.Logger
+	status           string
+	customBinaryPath string
+}
+
+func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, customBinaryPath string) (inference.Backend, error) {
+	return &ovms{
+		log:              log,
+		modelManager:     modelManager,
+		serverLog:        serverLog,
+		status:           inference.FormatNotInstalled(""),
+		customBinaryPath: customBinaryPath,
+	}, nil
+}
+
+func (o *ovms) Name() string {
+	return Name
+}
+
+func (o *ovms) UsesExternalModelManagement() bool {
+	return false
+}
+
+func (o *ovms) UsesTCP() bool {
+	return true
+}
+
+func (o *ovms) HealthPath() string {
+	return "/v2/health/ready"
+}
+
+func (o *ovms) RewritePath(path string) string {
+	if len(path) > 3 && path[:4] == "/v1/" {
+		return "/v3/" + path[4:]
+	}
+	return path
+}
+
+func (o *ovms) Install(ctx context.Context, _ *http.Client) error {
+	binary := o.binaryPath()
+	if _, err := os.Stat(binary); err != nil {
+		o.status = inference.FormatNotInstalled("")
+		return ErrOVMSNotFound
+	}
+
+	checkCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+
+	output, err := exec.CommandContext(checkCtx, binary, "--version").Output()
+	if err != nil {
+		o.log.Warn("could not get OVMS version", "error", err)
+		o.status = inference.FormatRunning(inference.DetailVersionUnknown)
+		return nil
+	}
+
+	versionLine := strings.TrimSpace(string(output))
+	if versionLine == "" {
+		o.status = inference.FormatRunning(inference.DetailVersionUnknown)
+		return nil
+	}
+
+	o.status = inference.FormatRunning(versionLine)
+	return nil
+}
+
+func (o *ovms) Run(ctx context.Context, socket, model string, modelRef string, _ inference.BackendMode, _ *inference.BackendConfiguration) error {
+	bundle, err := o.modelManager.GetBundle(model)
+	if err != nil {
+		return fmt.Errorf("failed to get model: %w", err)
+	}
+	modelPath := resolveOVMSModelPath(bundle.RootDir())
+
+	_, port, err := net.SplitHostPort(socket)
+	if err != nil {
+		return fmt.Errorf("invalid backend socket address %q: %w", socket, err)
+	}
+
+	// Use the human-readable model reference for --model_name so that
+	// incoming requests (which carry the original name) match.
+	modelName := modelRef
+	if modelName == "" {
+		modelName = model
+	}
+	logLevel := ovmsLogLevel(o.log)
+
+	args := []string{
+		"--rest_port", port,
+		"--port", "0",
+		"--model_name", modelName,
+		"--model_path", modelPath,
+		"--task", "text_generation",
+		"--log_level", logLevel,
+	}
+
+	return backends.RunBackend(ctx, backends.RunnerConfig{
+		BackendName:     "OVMS",
+		Socket:          socket,
+		BinaryPath:      o.binaryPath(),
+		SandboxPath:     filepath.Dir(o.binaryPath()),
+		SandboxConfig:   "",
+		Args:            args,
+		Logger:          o.log,
+		ServerLogWriter: logging.NewWriter(o.serverLog),
+	})
+}
+
+func (o *ovms) Status() string {
+	return o.status
+}
+
+func (o *ovms) GetDiskUsage() (int64, error) {
+	return 0, nil
+}
+
+func (o *ovms) binaryPath() string {
+	if o.customBinaryPath != "" {
+		return o.customBinaryPath
+	}
+	return defaultBinaryPath
+}
+
+// resolveOVMSModelPath returns the path OVMS should receive via --model_path.
+// Runtime bundles store model files under a dedicated "model" subdirectory.
+// Fallback to the bundle root for backward compatibility if it does not exist.
+func resolveOVMSModelPath(bundleRoot string) string {
+	modelDir := filepath.Join(bundleRoot, "model")
+	if info, err := os.Stat(modelDir); err == nil && info.IsDir() {
+		return modelDir
+	}
+	return bundleRoot
+}
+
+func ovmsLogLevel(logger logging.Logger) string {
+	if logger.Enabled(context.Background(), slog.LevelDebug) {
+		return "DEBUG"
+	}
+	return "INFO"
+}
diff --git a/pkg/inference/backends/ovms/ovms_test.go b/pkg/inference/backends/ovms/ovms_test.go
new file mode 100644
index 000000000..77739e817
--- /dev/null
+++ b/pkg/inference/backends/ovms/ovms_test.go
@@ -0,0 +1,49 @@
+package ovms
+
+import (
+	"log/slog"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/docker/model-runner/pkg/logging"
+)
+
+func TestResolveOVMSModelPath(t *testing.T) {
+	t.Run("uses model subdirectory when present", func(t *testing.T) {
+		bundleRoot := t.TempDir()
+		modelDir := filepath.Join(bundleRoot, "model")
+		if err := os.MkdirAll(modelDir, 0755); err != nil {
+			t.Fatalf("mkdir model dir: %v", err)
+		}
+
+		got := resolveOVMSModelPath(bundleRoot)
+		if got != modelDir {
+			t.Fatalf("resolveOVMSModelPath() = %q, want %q", got, modelDir)
+		}
+	})
+
+	t.Run("falls back to bundle root when model subdirectory is missing", func(t *testing.T) {
+		bundleRoot := t.TempDir()
+		got := resolveOVMSModelPath(bundleRoot)
+		if got != bundleRoot {
+			t.Fatalf("resolveOVMSModelPath() = %q, want %q", got, bundleRoot)
+		}
+	})
+}
+
+func TestOVMSLogLevel(t *testing.T) {
+	t.Run("debug logger uses DEBUG", func(t *testing.T) {
+		logger := logging.NewLogger(slog.LevelDebug)
+		if got := ovmsLogLevel(logger); got != "DEBUG" {
+			t.Fatalf("ovmsLogLevel() = %q, want %q", got, "DEBUG")
+		}
+	})
+
+	t.Run("non-debug logger uses INFO", func(t *testing.T) {
+		logger := logging.NewLogger(slog.LevelInfo)
+		if got := ovmsLogLevel(logger); got != "INFO" {
+			t.Fatalf("ovmsLogLevel() = %q, want %q", got, "INFO")
+		}
+	})
+}
diff --git a/pkg/inference/backends/runner.go b/pkg/inference/backends/runner.go
index 9cab22c56..6c32ea643 100644
--- a/pkg/inference/backends/runner.go
+++ b/pkg/inference/backends/runner.go
@@ -23,7 +23,7 @@ type ErrorTransformer func(output string) string
 
 // RunnerConfig holds configuration for a backend runner
 type RunnerConfig struct {
-	// BackendName is the display name of the backend (e.g., "llama.cpp", "vLLM")
+	// BackendName is the display name of the backend (e.g., "llama.cpp", "vLLM", "ovms")
 	BackendName string
 	// Socket is the unix socket path
 	Socket string
diff --git a/pkg/inference/scheduling/http_handler.go b/pkg/inference/scheduling/http_handler.go
index a9f3077b9..bd36bad34 100644
--- a/pkg/inference/scheduling/http_handler.go
+++ b/pkg/inference/scheduling/http_handler.go
@@ -273,6 +273,11 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque
 	// Create a request with the body replaced for forwarding upstream.
 	upstreamRequest := r.Clone(r.Context())
 	upstreamRequest.Body = io.NopCloser(bytes.NewReader(body))
+	// OpenAI-compatible inference endpoints always expect JSON payloads.
+	// Some clients (for example curl without explicit headers) default to
+	// application/x-www-form-urlencoded for -d bodies, which breaks OVMS
+	// routing and causes path-based model resolution. Normalize to JSON.
+	upstreamRequest.Header.Set("Content-Type", "application/json")
 
 	// Perform the request.
 	runner.ServeHTTP(w, upstreamRequest)
diff --git a/pkg/inference/scheduling/runner.go b/pkg/inference/scheduling/runner.go
index ed5ebff9c..64aa9ef82 100644
--- a/pkg/inference/scheduling/runner.go
+++ b/pkg/inference/scheduling/runner.go
@@ -131,6 +131,11 @@ func run(
 			// Remove the prefix up to the OpenAI API root.
 			pr.Out.URL.Path = trimRequestPathToOpenAIRoot(pr.Out.URL.Path)
 			pr.Out.URL.RawPath = trimRequestPathToOpenAIRoot(pr.Out.URL.RawPath)
+			// Allow backends to rewrite the proxied path.
+			if rp, ok := backend.(interface{ RewritePath(string) string }); ok {
+				pr.Out.URL.Path = rp.RewritePath(pr.Out.URL.Path)
+				pr.Out.URL.RawPath = rp.RewritePath(pr.Out.URL.RawPath)
+			}
 		},
 	}
 	proxy.ModifyResponse = func(resp *http.Response) error {
@@ -210,6 +215,12 @@ func run(
 
 // wait waits for the runner to be ready.
 func (r *runner) wait(ctx context.Context) error {
+	// Determine the health endpoint for this backend.
+	healthPath := "/health"
+	if hp, ok := r.backend.(interface{ HealthPath() string }); ok {
+		healthPath = hp.HealthPath()
+	}
+
 	// Loop and poll for readiness.
 	for p := 0; p < maximumReadinessPings; p++ {
 		select {
@@ -222,7 +233,7 @@ func (r *runner) wait(ctx context.Context) error {
 		}
 		// Create and execute a request targeting the health endpoint.
 		// Note: /health returns 503 during model loading, 200 when ready.
-		readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/health", http.NoBody)
+		readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost"+healthPath, http.NoBody)
 		if err != nil {
 			return fmt.Errorf("readiness request creation failed: %w", err)
 		}

From d01862afaca7ed3d469fcf4d4a6bdcb18f83ef62 Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Tue, 26 May 2026 01:24:23 +0200
Subject: [PATCH 2/4] update

---
 README.md | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 92fe817cd..12cddc36d 100644
--- a/README.md
+++ b/README.md
@@ -332,8 +332,13 @@ When the runner starts, it will try to initialize OVMS as an available backend.
 OVMS_SERVER_PATH=/absolute/path/to/ovms ./model-runner
 ```
 
-You can target OVMS explicitly through the backend-prefixed OpenAI-compatible routes:
+Create a new model
+Use models from HugginFace Hub using repository with OpenVINO format. 
+```sh
+curl http://localhost:13434/models/create -X POST -d '{"from": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov"}'
+```
 
+You can target OVMS explicitly through the backend-prefixed OpenAI-compatible routes:
 ```sh
 # List models exposed via OVMS backend routing
 curl http://localhost:13434/engines/ovms/v1/models
@@ -346,6 +351,10 @@ curl http://localhost:13434/engines/ovms/v1/chat/completions -X POST -d '{
   ]
 }'
 ```
+Delete model
+```sh
+curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE
+```
 
 ## API Examples
 
@@ -357,17 +366,17 @@ When running with `docker-run`, you can use regular HTTP requests:
 
 ```sh
 # List all available models
-curl http://localhost:13434/models
+curl http://localhost:8080/models
 
 # Create a new model
-curl http://localhost:13434/models/create -X POST -d '{"from": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov"}'
+curl http://localhost:8080/models/create -X POST -d '{"from": "ai/smollm2"}'
 
 # Get information about a specific model
 curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov
 
 # Chat with a model
-curl http://localhost:13434/engines/ovms/v1/chat/completions -X POST -d '{
-  "model": "hf.co/OpenVINO/Qwen3-0.6B-int4-ov",
+curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{
+  "model": "ai/smollm2",
   "messages": [
     {"role": "system", "content": "You are a helpful assistant."},
     {"role": "user", "content": "Hello, how are you?"}
@@ -377,6 +386,34 @@ curl http://localhost:13434/engines/ovms/v1/chat/completions -X POST -d '{
 # Delete a model
 curl http://localhost:13434/models/hf.co/OpenVINO/Qwen3-0.6B-int4-ov -X DELETE
 
+# Get metrics
+curl http://localhost:8080/metrics
+```
+The response will contain the model's reply:
+
+```json
+{
+  "id": "chat-12345",
+  "object": "chat.completion",
+  "created": 1682456789,
+  "model": "ai/smollm2",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "I'm doing well, thank you for asking! How can I assist you today?"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 24,
+    "completion_tokens": 16,
+    "total_tokens": 40
+  }
+}
+```
 
 ### Features
 

From 84ebb224545566fdec1c8896f4ff153e4afd33c5 Mon Sep 17 00:00:00 2001
From: "Trawinski, Dariusz" <dariusz.trawinski@intel.com>
Date: Tue, 26 May 2026 01:28:55 +0200
Subject: [PATCH 3/4] Update pkg/inference/backends/ovms/ovms.go

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 pkg/inference/backends/ovms/ovms.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/inference/backends/ovms/ovms.go b/pkg/inference/backends/ovms/ovms.go
index 33664cca1..b35090050 100644
--- a/pkg/inference/backends/ovms/ovms.go
+++ b/pkg/inference/backends/ovms/ovms.go
@@ -71,7 +71,7 @@ func (o *ovms) RewritePath(path string) string {
 
 func (o *ovms) Install(ctx context.Context, _ *http.Client) error {
 	binary := o.binaryPath()
-	if _, err := os.Stat(binary); err != nil {
+	if _, err := exec.LookPath(binary); err != nil {
 		o.status = inference.FormatNotInstalled("")
 		return ErrOVMSNotFound
 	}

From 941c31e99c617103c7005fa45f696666ab81337b Mon Sep 17 00:00:00 2001
From: Dariusz Trawinski <dariusz.trawinski@intel.com>
Date: Tue, 26 May 2026 01:39:49 +0200
Subject: [PATCH 4/4] update readme

---
 README.md                                |  8 +++++---
 pkg/inference/backends/ovms/ovms.go      | 13 +++++++++---
 pkg/inference/backends/ovms/ovms_test.go | 25 ++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 12cddc36d..27f7a1cf3 100644
--- a/README.md
+++ b/README.md
@@ -323,13 +323,15 @@ The vLLM wheels are sourced from the official vLLM GitHub Releases at `https://g
 
 Docker Model Runner can also run an OVMS backend.
 
-- Default OVMS binary path: `./ovms/bin/ovms`
+- Default OVMS binary: resolved from `PATH` (looks up `ovms`)
 - Override binary path with: `OVMS_SERVER_PATH`
 
-When the runner starts, it will try to initialize OVMS as an available backend. If you are running from source and want to use a custom OVMS binary, set:
+OVMS can be installed based on this [guide](https://docs.openvino.ai/2026/model-server/ovms_docs_deploying_server_baremetal.html). Minimal version is 2026.2.
+
+When the runner starts, it will try to initialize OVMS as an available backend.
 
 ```sh
-OVMS_SERVER_PATH=/absolute/path/to/ovms ./model-runner
+MODEL_RUNNER_PORT=13434 ./model-runner
 ```
 
 Create a new model
diff --git a/pkg/inference/backends/ovms/ovms.go b/pkg/inference/backends/ovms/ovms.go
index b35090050..7a6f133ae 100644
--- a/pkg/inference/backends/ovms/ovms.go
+++ b/pkg/inference/backends/ovms/ovms.go
@@ -22,8 +22,6 @@ import (
 const (
 	// Name is the backend name.
 	Name = "ovms"
-
-	defaultBinaryPath = "./ovms/bin/ovms"
 )
 
 var ErrOVMSNotFound = errors.New("ovms binary not found")
@@ -71,6 +69,11 @@ func (o *ovms) RewritePath(path string) string {
 
 func (o *ovms) Install(ctx context.Context, _ *http.Client) error {
 	binary := o.binaryPath()
+	if o.customBinaryPath != "" {
+		o.log.Info("OVMS binary configured via OVMS_SERVER_PATH", "path", binary)
+	} else if resolved, err := exec.LookPath(Name); err == nil {
+		o.log.Info("OVMS binary resolved from PATH", "path", resolved)
+	}
 	if _, err := exec.LookPath(binary); err != nil {
 		o.status = inference.FormatNotInstalled("")
 		return ErrOVMSNotFound
@@ -154,7 +157,11 @@ func (o *ovms) binaryPath() string {
 	if o.customBinaryPath != "" {
 		return o.customBinaryPath
 	}
-	return defaultBinaryPath
+	if path, err := exec.LookPath(Name); err == nil {
+		return path
+	}
+	// Keep command name as a last resort so error reporting remains clear.
+	return Name
 }
 
 // resolveOVMSModelPath returns the path OVMS should receive via --model_path.
diff --git a/pkg/inference/backends/ovms/ovms_test.go b/pkg/inference/backends/ovms/ovms_test.go
index 77739e817..3a67e4953 100644
--- a/pkg/inference/backends/ovms/ovms_test.go
+++ b/pkg/inference/backends/ovms/ovms_test.go
@@ -9,6 +9,31 @@ import (
 	"github.com/docker/model-runner/pkg/logging"
 )
 
+func TestBinaryPath(t *testing.T) {
+	t.Run("uses custom binary path when provided", func(t *testing.T) {
+		o := &ovms{customBinaryPath: "/tmp/custom-ovms"}
+		if got := o.binaryPath(); got != "/tmp/custom-ovms" {
+			t.Fatalf("binaryPath() = %q, want %q", got, "/tmp/custom-ovms")
+		}
+	})
+
+	t.Run("uses ovms from PATH when custom path is empty", func(t *testing.T) {
+		binDir := t.TempDir()
+		binary := filepath.Join(binDir, Name)
+		if err := os.WriteFile(binary, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+			t.Fatalf("write fake ovms binary: %v", err)
+		}
+
+		originalPath := os.Getenv("PATH")
+		t.Setenv("PATH", binDir+string(os.PathListSeparator)+originalPath)
+
+		o := &ovms{}
+		if got := o.binaryPath(); got != binary {
+			t.Fatalf("binaryPath() = %q, want %q", got, binary)
+		}
+	})
+}
+
 func TestResolveOVMSModelPath(t *testing.T) {
 	t.Run("uses model subdirectory when present", func(t *testing.T) {
 		bundleRoot := t.TempDir()