Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .devops/openvino.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
ARG OPENVINO_VERSION_MAJOR=2026.2
ARG OPENVINO_VERSION_FULL=2026.2.0.21903.52ddc073857
ARG OPENVINO_VERSION_MAJOR=2026.2.1
ARG OPENVINO_VERSION_FULL=2026.2.1.21919.ede283a88e3
ARG UBUNTU_VERSION=24.04

# Intel GPU driver versions. https://github.com/intel/compute-runtime/releases
ARG IGC_VERSION=v2.34.4
ARG IGC_VERSION_FULL=2_2.34.4+21428
ARG COMPUTE_RUNTIME_VERSION=26.18.38308.1
ARG COMPUTE_RUNTIME_VERSION_FULL=26.18.38308.1-0
ARG IGC_VERSION=v2.36.3
ARG IGC_VERSION_FULL=2_2.36.3+21719
ARG COMPUTE_RUNTIME_VERSION=26.22.38646.4
ARG COMPUTE_RUNTIME_VERSION_FULL=26.22.38646.4-0
ARG IGDGMM_VERSION=22.10.0

# Intel NPU driver versions. https://github.com/intel/linux-npu-driver/releases
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/build-cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ jobs:

env:
# Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.2"
OPENVINO_VERSION_FULL: "2026.2.0.21903.52ddc073857"
OPENVINO_VERSION_MAJOR: "2026.2.1"
OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3"

steps:
- name: Clone
Expand All @@ -96,8 +96,8 @@ jobs:

env:
# Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.2"
OPENVINO_VERSION_FULL: "2026.2.0.21903.52ddc073857"
OPENVINO_VERSION_MAJOR: "2026.2.1"
OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3"

steps:
- name: Clone
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/build-openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ jobs:

env:
# Sync versions in build-openvino.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.2"
OPENVINO_VERSION_FULL: "2026.2.0.21903.52ddc073857"
OPENVINO_VERSION_MAJOR: "2026.2.1"
OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3"

steps:
- name: Clone
Expand Down Expand Up @@ -96,8 +96,8 @@ jobs:

env:
# Sync versions in build-openvino.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.2"
OPENVINO_VERSION_FULL: "2026.2.0.21903.52ddc073857"
OPENVINO_VERSION_MAJOR: "2026.2.1"
OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3"

steps:
- name: Clone
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,8 @@ jobs:

env:
# Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.2"
OPENVINO_VERSION_FULL: "2026.2.0.21903.52ddc073857"
OPENVINO_VERSION_MAJOR: "2026.2.1"
OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3"

steps:
- name: Clone
Expand Down
66 changes: 55 additions & 11 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -446,8 +446,8 @@ jobs:

env:
# Sync versions in build-openvino.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.2"
OPENVINO_VERSION_FULL: "2026.2.0.21903.52ddc073857"
OPENVINO_VERSION_MAJOR: "2026.2.1"
OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3"

steps:
- name: Set OpenVINO version output
Expand Down Expand Up @@ -506,8 +506,11 @@ jobs:
cmake -B build/ReleaseOV -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENVINO=ON \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }}
cmake --build build/ReleaseOV --config Release -j $(nproc)
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} \
${{ env.CMAKE_ARGS }}
cmake --build build/ReleaseOV --config Release --parallel
- name: ccache-clear
uses: ./.github/actions/ccache-clear
Expand All @@ -521,8 +524,26 @@ jobs:
- name: Pack artifacts
id: pack_artifacts
run: |
cp LICENSE ./build/ReleaseOV/bin/
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/ReleaseOV/bin .
dest=./build/ReleaseOV/bin/
OPENVINO_ROOT=./openvino_toolkit
ov_lib="$OPENVINO_ROOT/runtime/lib/intel64"
# Bundle OpenVINO runtime libs + TBB. Binaries built with RPATH=$ORIGIN
# load these siblings without setupvars.sh / LD_LIBRARY_PATH.
cp -P "$ov_lib"/libopenvino.so* \
"$ov_lib"/libopenvino_c.so* \
"$ov_lib"/libopenvino_*_plugin.so \
"$ov_lib"/libopenvino_intel_npu_compiler*.so \
"$OPENVINO_ROOT/runtime/3rdparty/tbb/lib"/*.so* \
"$dest"
cp -P /usr/lib/x86_64-linux-gnu/libOpenCL.so.1* "$dest" 2>/dev/null || true
cp "$ov_lib/cache.json" "$dest" 2>/dev/null || true
# OpenVINO licensing
cp -r "$OPENVINO_ROOT/docs/licensing" "$dest/openvino-licensing"
Comment thread
ravi9 marked this conversation as resolved.
Outdated
cp LICENSE "$dest"
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C "$dest" .
- name: Upload artifacts
uses: actions/upload-artifact@v6
Expand All @@ -538,8 +559,8 @@ jobs:

env:
# Sync versions in build-openvino.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.2"
OPENVINO_VERSION_FULL: "2026.2.0.21903.52ddc073857"
OPENVINO_VERSION_MAJOR: "2026.2.1"
OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3"

steps:
- name: Set OpenVINO version output
Expand Down Expand Up @@ -607,7 +628,9 @@ jobs:
-A x64 ^
-DCMAKE_BUILD_TYPE=Release ^
-DGGML_OPENVINO=ON ^
-DCMAKE_TOOLCHAIN_FILE=C:\vcpkg\scripts\buildsystems\vcpkg.cmake
-DLLAMA_BUILD_BORINGSSL=ON ^
-DCMAKE_TOOLCHAIN_FILE=C:\vcpkg\scripts\buildsystems\vcpkg.cmake ^
${{ env.CMAKE_ARGS }}
cmake --build build\ReleaseOV --config Release -- /m
Expand All @@ -624,8 +647,29 @@ jobs:
id: pack_artifacts
shell: powershell
run: |
Copy-Item LICENSE .\build\ReleaseOV\bin\
7z a -snl llama-${{ steps.tag.outputs.name }}-bin-win-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.zip .\build\ReleaseOV\bin\*
# Locate the extracted OpenVINO toolkit root (same pattern as the Build step).
$OPENVINO_ROOT = (Get-ChildItem -Directory openvino_toolkit | Select-Object -First 1).FullName
if (-not $OPENVINO_ROOT) {
Write-Error "OpenVINO toolkit folder not found under .\openvino_toolkit"
exit 1
}
$dest = ".\build\ReleaseOV\bin\Release"
$ovBin = Join-Path $OPENVINO_ROOT 'runtime\bin\intel64\Release'
Copy-Item -Path (Join-Path $ovBin '*.dll') -Destination $dest -Force
Copy-Item -Path (Join-Path $ovBin 'cache.json') -Destination $dest -Force
$tbbBin = Join-Path $OPENVINO_ROOT 'runtime\3rdparty\tbb\bin'
Copy-Item -Path (Join-Path $tbbBin 'tbb*.dll') -Destination $dest -Force
# OpenVINO licensing
$licensingDest = Join-Path $dest 'openvino-licensing'
New-Item -ItemType Directory -Force -Path $licensingDest | Out-Null
Copy-Item -Path (Join-Path $OPENVINO_ROOT 'docs\licensing\*') -Destination $licensingDest -Recurse -Force
Copy-Item LICENSE $dest
7z a -snl llama-${{ steps.tag.outputs.name }}-bin-win-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.zip $dest\*
- name: Upload artifacts
uses: actions/upload-artifact@v6
Expand Down
12 changes: 6 additions & 6 deletions docs/backend/OPENVINO.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ chmod +x ubuntu-llamacpp-ov-install.sh
# ============================================
set -euo pipefail

OPENVINO_VERSION_MAJOR="2026.2"
OPENVINO_VERSION_FULL="2026.2.0.21903.52ddc073857"
OPENVINO_VERSION_MAJOR="2026.2.1"
OPENVINO_VERSION_FULL="2026.2.1.21919.ede283a88e3"

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OPENVINO_INSTALL_DIR="/opt/intel/openvino_${OPENVINO_VERSION_MAJOR}"
Expand Down Expand Up @@ -334,7 +334,7 @@ echo " ./build/ReleaseOV/bin/llama-cli -m model.gguf"
```

> [!NOTE]
> The script pins OpenVINO `2026.2` via the `OPENVINO_VERSION_MAJOR` / `OPENVINO_VERSION_FULL` variables at the top — edit them to track a different release.
> The script pins OpenVINO `2026.2.1` via the `OPENVINO_VERSION_MAJOR` / `OPENVINO_VERSION_FULL` variables at the top — edit them to track a different release.

</details>

Expand Down Expand Up @@ -364,8 +364,8 @@ REM ============================================
REM llama.cpp OpenVINO Build Script (Ninja)
REM ============================================

set "OPENVINO_VERSION_MAJOR=2026.2"
set "OPENVINO_VERSION_FULL=2026.2.0.21903.52ddc073857"
set "OPENVINO_VERSION_MAJOR=2026.2.1"
set "OPENVINO_VERSION_FULL=2026.2.1.21919.ede283a88e3"

set "SCRIPT_DIR=%~dp0"
set "VCPKG_DIR=C:\vcpkg"
Expand Down Expand Up @@ -547,7 +547,7 @@ endlocal
```

> [!NOTE]
> The script pins OpenVINO `2026.2` via the `OPENVINO_VERSION_MAJOR` / `OPENVINO_VERSION_FULL` variables at the top — edit them to track a different release. From any new shell, source the matching `setupvars` script via the junction — `call "C:\Intel\openvino\setupvars.bat"` from `cmd`, or `& "C:\Intel\openvino\setupvars.ps1"` from PowerShell. If `winget` cannot register Visual Studio Build Tools on first run, install them once manually and re-run the script from an elevated **Developer Command Prompt for VS 2022**.
> The script pins OpenVINO `2026.2.1` via the `OPENVINO_VERSION_MAJOR` / `OPENVINO_VERSION_FULL` variables at the top — edit them to track a different release. From any new shell, source the matching `setupvars` script via the junction — `call "C:\Intel\openvino\setupvars.bat"` from `cmd`, or `& "C:\Intel\openvino\setupvars.ps1"` from PowerShell. If `winget` cannot register Visual Studio Build Tools on first run, install them once manually and re-run the script from an elevated **Developer Command Prompt for VS 2022**.

</details>

Expand Down
69 changes: 3 additions & 66 deletions ggml/src/ggml-openvino/ggml-decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1270,77 +1270,14 @@ void GgmlOvDecoder::visit_subgraph(std::function<void(std::shared_ptr<GgmlDecode
}

std::string GgmlOvDecoder::compute_op_type(const ggml_tensor * node) {
static const std::map<ggml_op, std::string> ops = {
{GGML_OP_NONE, "GGML_OP_NONE" },
{GGML_OP_ACC, "GGML_OP_ACC" },
{GGML_OP_ADD, "GGML_OP_ADD" },
{GGML_OP_ADD1, "GGML_OP_ADD1" },
{GGML_OP_ADD_ID, "GGML_OP_ADD_ID" },
{GGML_OP_CONCAT, "GGML_OP_CONCAT" },
{GGML_OP_CONT, "GGML_OP_CONT" },
{GGML_OP_DIV, "GGML_OP_DIV" },
{GGML_OP_DUP, "GGML_OP_DUP" },
{GGML_OP_GET_ROWS, "GGML_OP_GET_ROWS" },
{GGML_OP_MUL, "GGML_OP_MUL" },
{GGML_OP_MUL_MAT, "GGML_OP_MUL_MAT" },
{GGML_OP_MUL_MAT_ID, "GGML_OP_MUL_MAT_ID" },
{GGML_OP_PERMUTE, "GGML_OP_PERMUTE" },
{GGML_OP_RESHAPE, "GGML_OP_RESHAPE" },
{GGML_OP_RMS_NORM, "GGML_OP_RMS_NORM" },
{GGML_OP_NORM, "GGML_OP_NORM" },
{GGML_OP_ROPE, "GGML_OP_ROPE" },
{GGML_OP_SCALE, "GGML_OP_SCALE" },
{GGML_OP_SOFT_MAX, "GGML_OP_SOFT_MAX" },
{GGML_OP_SUM_ROWS, "GGML_OP_SUM_ROWS" },
{GGML_OP_SUB, "GGML_OP_SUB" },
{GGML_OP_TRANSPOSE, "GGML_OP_TRANSPOSE" },
{GGML_OP_VIEW, "GGML_OP_VIEW" },
{GGML_OP_SET_ROWS, "GGML_OP_SET_ROWS" },
{GGML_OP_CPY, "GGML_OP_CPY" },
{GGML_OP_FLASH_ATTN_EXT, "GGML_OP_FLASH_ATTN_EXT" },
{GGML_OP_L2_NORM, "GGML_OP_L2_NORM" },
{GGML_OP_CLAMP, "GGML_OP_CLAMP" },
{GGML_OP_PAD, "GGML_OP_PAD" },
{GGML_OP_SSM_CONV, "GGML_OP_SSM_CONV" },
{GGML_OP_GATED_DELTA_NET, "GGML_OP_GATED_DELTA_NET"},
{GGML_OP_ARGSORT, "GGML_OP_ARGSORT" },
{GGML_OP_REPEAT, "GGML_OP_REPEAT" },
{GGML_OP_IM2COL, "GGML_OP_IM2COL" }
};
static const std::map<ggml_unary_op, std::string> unary_ops = {
{GGML_UNARY_OP_ABS, "GGML_UNARY_OP_ABS" },
{GGML_UNARY_OP_SGN, "GGML_UNARY_OP_SGN" },
{GGML_UNARY_OP_NEG, "GGML_UNARY_OP_NEG" },
{GGML_UNARY_OP_STEP, "GGML_UNARY_OP_STEP" },
{GGML_UNARY_OP_TANH, "GGML_UNARY_OP_TANH" },
{GGML_UNARY_OP_ELU, "GGML_UNARY_OP_ELU" },
{GGML_UNARY_OP_RELU, "GGML_UNARY_OP_RELU" },
{GGML_UNARY_OP_SIGMOID, "GGML_UNARY_OP_SIGMOID" },
{GGML_UNARY_OP_GELU, "GGML_UNARY_OP_GELU" },
{GGML_UNARY_OP_GELU_QUICK, "GGML_UNARY_OP_GELU_QUICK" },
{GGML_UNARY_OP_SILU, "GGML_UNARY_OP_SILU" },
{GGML_UNARY_OP_SOFTPLUS, "GGML_UNARY_OP_SOFTPLUS" },
{GGML_UNARY_OP_HARDSWISH, "GGML_UNARY_OP_HARDSWISH" },
{GGML_UNARY_OP_HARDSIGMOID, "GGML_UNARY_OP_HARDSIGMOID"},
{GGML_UNARY_OP_EXP, "GGML_UNARY_OP_EXP" },
{GGML_UNARY_OP_COUNT, "GGML_UNARY_OP_COUNT" }
};
static const std::map<ggml_glu_op, std::string> glu_ops = {
{GGML_GLU_OP_SWIGLU, "GGML_GLU_OP_SWIGLU"},
{GGML_GLU_OP_GEGLU, "GGML_GLU_OP_GEGLU" },
{GGML_GLU_OP_REGLU, "GGML_GLU_OP_REGLU" }
};

switch (node->op) {
case GGML_OP_UNARY:
return unary_ops.at(ggml_get_unary_op(node));
return std::string("GGML_UNARY_OP_") + ggml_unary_op_name(ggml_get_unary_op(node));
case GGML_OP_GLU:
return glu_ops.at(ggml_get_glu_op(node));
return std::string("GGML_GLU_OP_") + ggml_glu_op_name(ggml_get_glu_op(node));
default:
return ops.at(node->op);
return std::string("GGML_OP_") + ggml_op_name(node->op);
}
static const std::string unknown_op = "UNKNOWN_GGML_OP";
return unknown_op;
}

const std::string & GgmlOvDecoder::get_op_type(int node_idx) const {
Expand Down
24 changes: 19 additions & 5 deletions ggml/src/ggml-openvino/openvino/op/add_id.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,22 @@ namespace frontend {
namespace ggml {
namespace op {

static ov::Output<ov::Node> reshape_add_id_input_to_2d(const ov::Output<ov::Node> & input,
const ov::PartialShape & input_shape,
const std::vector<int> & dims) {
const auto actual_shape = input.get_partial_shape();
if (actual_shape.rank().is_static() && actual_shape.rank().get_length() == 2) {
return input;
}

if (input_shape.rank().is_static() && input_shape.rank().get_length() == 2) {
return input;
}

auto shape = std::make_shared<ov::op::v3::ShapeOf>(input, ov::element::i64);
return std::make_shared<ov::op::v1::Reshape>(input, get_dimensions(shape, dims), false);
}

OutputVector translate_add_id(const NodeContext & context) {
num_inputs_check(context, 3, 3);

Expand All @@ -28,11 +44,9 @@ OutputVector translate_add_id(const NodeContext & context) {
// input: [1, n_token, n_used, n_embd]
// bias: [1, 1, n_expert, n_embd]
// ids: [1, 1, n_token, n_used]
auto bias_shape_4d = std::make_shared<ov::op::v3::ShapeOf>(bias, ov::element::i64);
auto ids_shape_4d = std::make_shared<ov::op::v3::ShapeOf>(ids, ov::element::i64);

bias = std::make_shared<ov::op::v1::Reshape>(bias, get_dimensions(bias_shape_4d, {2, 3}), false);
ids = std::make_shared<ov::op::v1::Reshape>(ids, get_dimensions(ids_shape_4d, {2, 3}), false);
// Model bias constants may already be stored as [n_expert, n_embd].
bias = reshape_add_id_input_to_2d(bias, context.get_input_shape(1), {2, 3});
ids = reshape_add_id_input_to_2d(ids, context.get_input_shape(2), {2, 3});

if (ids.get_element_type() != ov::element::i32 && ids.get_element_type() != ov::element::i64) {
ids = std::make_shared<ov::op::v0::Convert>(ids, ov::element::i32);
Expand Down
Loading