Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/downlo
psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v3.0.2.zip;a064e663b4d7a337ac291d1bef7337ef4e60a1ae
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b
pytorch_cpuinfo;https://github.com/crvineeth97/cpuinfo/archive/df8c6a8ce5cf12baabe5e7c9213aaeeffb18bd82.zip;34999b2434e49f1a66d50fb62f28663fb8c96881
re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
Expand Down
6 changes: 1 addition & 5 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -369,11 +369,7 @@ if (CPUINFO_SUPPORTED)
URL_HASH SHA1=${DEP_SHA1_pytorch_cpuinfo}
EXCLUDE_FROM_ALL
PATCH_COMMAND
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch &&
# https://github.com/pytorch/cpuinfo/pull/324
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch &&
# https://github.com/pytorch/cpuinfo/pull/348
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch
FIND_PACKAGE_ARGS NAMES cpuinfo
)
else()
Expand Down
4 changes: 2 additions & 2 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1518,8 +1518,8 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)

endif()


if(onnxruntime_USE_QNN)
# Build ep_weight_sharing_ctx_gen for all supported EPs (QNN, TensorRT, OpenVINO, VitisAI)
if(onnxruntime_USE_QNN OR onnxruntime_USE_TENSORRT OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_VITISAI)
#qnn ctx generator
set(ep_weight_sharing_ctx_gen_src_dir ${TEST_SRC_DIR}/ep_weight_sharing_ctx_gen)
set(ep_weight_sharing_ctx_gen_src_patterns
Expand Down
91 changes: 0 additions & 91 deletions cmake/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch

This file was deleted.

19 changes: 0 additions & 19 deletions cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch

This file was deleted.

91 changes: 0 additions & 91 deletions cmake/vcpkg-ports/cpuinfo/patch_vcpkg_arm64ec_support.patch

This file was deleted.

10 changes: 4 additions & 6 deletions cmake/vcpkg-ports/cpuinfo/portfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@ endif()

vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO pytorch/cpuinfo
REF 403d652dca4c1046e8145950b1c0997a9f748b57
SHA512 f7cd6dc44bd1120af610cae1337ed4c0f557ba78d2de9c73fed350fa3dfe9512643a1619ae55f5a540c6316a87d641856cca27297bb8766e48f39b7b7a59da1f
HEAD_REF master
REPO crvineeth97/cpuinfo
REF df8c6a8ce5cf12baabe5e7c9213aaeeffb18bd82
SHA512 0 # TODO: update SHA512 after PR merges to pytorch/cpuinfo
HEAD_REF vchelur/add-cpuinfo-deinitialize
PATCHES
patch_cpuinfo_h_for_arm64ec.patch
patch_vcpkg_arm64ec_support.patch # https://github.com/pytorch/cpuinfo/pull/324
win_arm_fp16_detection_fallback.patch # https://github.com/pytorch/cpuinfo/pull/348
)

vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
Expand Down
19 changes: 0 additions & 19 deletions cmake/vcpkg-ports/cpuinfo/win_arm_fp16_detection_fallback.patch

This file was deleted.

4 changes: 2 additions & 2 deletions onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ class GQAAttentionBase {
const size_t batch_index = i / num_heads_;
const size_t head_index = i % num_heads_;
const size_t total_seqlen = SafeInt<size_t>(seqlens_k[batch_index]) + 1;
const size_t past_seqlen = is_prompt ? 0 : total_seqlen - sequence_length; // Assume no padding sequence length
const size_t past_seqlen = is_prompt ? 0 : (total_seqlen > sequence_length ? total_seqlen - sequence_length : 0); // Assume no padding sequence length
const size_t past_chunk_length = SafeInt<size_t>(past_seqlen) * head_size;

const ptrdiff_t output_offset = SafeInt<ptrdiff_t>(i) * sequence_length * present_buffer_sequence_length;
Expand Down Expand Up @@ -441,7 +441,7 @@ class GQAAttentionBase {
const size_t batch_index = i / num_heads_;
const size_t head_index = i % num_heads_;
const size_t total_seqlen = SafeInt<size_t>(seqlens_k[batch_index]) + 1;
const size_t past_seqlen = is_prompt ? 0 : total_seqlen - sequence_length; // Assume no padding sequence length
const size_t past_seqlen = is_prompt ? 0 : (total_seqlen > sequence_length ? total_seqlen - sequence_length : 0); // Assume no padding sequence length
const size_t past_chunk_length = SafeInt<size_t>(past_seqlen) * head_size;

const T* v;
Expand Down
19 changes: 14 additions & 5 deletions onnxruntime/contrib_ops/cpu/bert/group_query_attention.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,20 @@ Status GroupQueryAttention<T>::Compute(OpKernelContext* context) const {
for (int b = 0; b < batch_size; b++) {
const int total_seqlen = seqlens_k->Data<int32_t>()[b] + 1;
const int past_seqlen = total_seqlen - sequence_length;
for (int s = 0; s < sequence_length; s++) {
if (past_seqlen + s < total_seqlen) {
default_pos_ids[b * sequence_length + s] = static_cast<int64_t>(past_seqlen) + s;
} else {
default_pos_ids[b * sequence_length + s] = static_cast<int64_t>(1);

// Handle inconsistent random data in seqlens_k, when past_seqlen becomes negative
if (past_seqlen < 0) {
// Fallback: generate consecutive position IDs starting from 0
for (int s = 0; s < sequence_length; s++) {
default_pos_ids[b * sequence_length + s] = static_cast<int64_t>(s);
}
} else {
for (int s = 0; s < sequence_length; s++) {
if (past_seqlen + s < total_seqlen) {
default_pos_ids[b * sequence_length + s] = static_cast<int64_t>(past_seqlen) + s;
} else {
default_pos_ids[b * sequence_length + s] = static_cast<int64_t>(1);
}
}
}
}
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/common/cpuid_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -378,4 +378,15 @@ CPUIDInfo::CPUIDInfo() {
#endif
#endif // defined(CPUIDINFO_ARCH_ARM)
}

void CPUIDInfo::ShutDown() {
#if defined(CPUINFO_SUPPORTED)
static bool is_shutdown = false;
if (!is_shutdown && pytorch_cpuinfo_init_) {
cpuinfo_deinitialize();
pytorch_cpuinfo_init_ = false;
is_shutdown = true;
}
#endif
}
} // namespace onnxruntime
23 changes: 21 additions & 2 deletions onnxruntime/core/common/cpuid_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#pragma once

#include <atomic>
#include "core/common/common.h"
#include "core/common/cpuid_arch_definition.h"

Expand All @@ -11,8 +12,7 @@ namespace onnxruntime {
class CPUIDInfo {
public:
static const CPUIDInfo& GetCPUIDInfo() {
static CPUIDInfo cpuid_info;
return cpuid_info;
return Instance();
}

std::string_view GetCPUVendor() const {
Expand Down Expand Up @@ -104,13 +104,32 @@ class CPUIDInfo {
return has_fp16_;
}

static void ShutdownCpuInfo() {
// Don't create the singleton during DLL unload.
if (!InstanceCreated().load(std::memory_order_acquire)) return;
Instance().ShutDown();
}

private:
// Log function that uses ORT logging if available or writes to stderr.
// This enables us to log even before ORT logging has been initialized.
static void LogEarlyWarning(std::string_view message);

CPUIDInfo();

static std::atomic<bool>& InstanceCreated() {
static std::atomic<bool> created{false};
return created;
}

static CPUIDInfo& Instance() {
static CPUIDInfo cpuid_info;
InstanceCreated().store(true, std::memory_order_release);
return cpuid_info;
}

void ShutDown();

void VendorInfoInit();

#if defined(CPUIDINFO_ARCH_X86)
Expand Down
Loading
Loading