diff --git a/cmake/deps.txt b/cmake/deps.txt index 448e6fcb23f2f..cd8553d7ccd4e 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -47,7 +47,7 @@ protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/downlo psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013 pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2 pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v3.0.2.zip;a064e663b4d7a337ac291d1bef7337ef4e60a1ae -pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b +pytorch_cpuinfo;https://github.com/crvineeth97/cpuinfo/archive/df8c6a8ce5cf12baabe5e7c9213aaeeffb18bd82.zip;34999b2434e49f1a66d50fb62f28663fb8c96881 re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381 diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 4afa074a0b254..8b1087ed1048b 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -369,11 +369,7 @@ if (CPUINFO_SUPPORTED) URL_HASH SHA1=${DEP_SHA1_pytorch_cpuinfo} EXCLUDE_FROM_ALL PATCH_COMMAND - ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch && - # https://github.com/pytorch/cpuinfo/pull/324 - ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch && - # https://github.com/pytorch/cpuinfo/pull/348 - ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/win_arm_fp16_detection_fallback.patch + ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch FIND_PACKAGE_ARGS NAMES cpuinfo ) else() diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 4e5636572b94a..a431c57b33437 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1518,8 +1518,8 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() - - if(onnxruntime_USE_QNN) + # Build ep_weight_sharing_ctx_gen for all supported EPs (QNN, TensorRT, OpenVINO, VitisAI) + if(onnxruntime_USE_QNN OR onnxruntime_USE_TENSORRT OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_VITISAI) #qnn ctx generator set(ep_weight_sharing_ctx_gen_src_dir ${TEST_SRC_DIR}/ep_weight_sharing_ctx_gen) set(ep_weight_sharing_ctx_gen_src_patterns diff --git a/cmake/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch b/cmake/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch deleted file mode 100644 index af0f039b6c2a3..0000000000000 --- a/cmake/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index aedc983..dab589e 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -72,6 +72,17 @@ IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND CPUINFO_TARGET_PROCESSOR STREQUAL "am - ENDIF() - IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$") - SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}") -+ELSEIF(MSVC AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.10") -+ # Use CMAKE_C_COMPILER_ARCHITECTURE_ID. MSVC values are documented as available since CMake 3.10. -+ IF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "X86") -+ SET(CPUINFO_TARGET_PROCESSOR "x86") -+ ELSEIF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "x64") -+ SET(CPUINFO_TARGET_PROCESSOR "x86_64") -+ ELSEIF(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "^(ARM64|ARM64EC)$") -+ SET(CPUINFO_TARGET_PROCESSOR "arm64") -+ ELSE() -+ MESSAGE(FATAL_ERROR "Unsupported MSVC compiler architecture ID \"${CMAKE_C_COMPILER_ARCHITECTURE_ID}\"") -+ ENDIF() - ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_VS_PLATFORM_NAME) - IF(CMAKE_VS_PLATFORM_NAME STREQUAL "Win32") - SET(CPUINFO_TARGET_PROCESSOR "x86") -@@ -88,7 +99,7 @@ ENDIF() - - # ---[ Build flags - SET(CPUINFO_SUPPORTED_PLATFORM TRUE) --IF(NOT CMAKE_SYSTEM_PROCESSOR) -+IF(NOT CPUINFO_TARGET_PROCESSOR) - IF(NOT IOS) - MESSAGE(WARNING - "Target processor architecture is not specified. " -@@ -201,12 +212,12 @@ IF(CPUINFO_SUPPORTED_PLATFORM) - src/arm/linux/chipset.c - src/arm/linux/midr.c - src/arm/linux/hwcap.c) -- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]") -+ IF(CPUINFO_TARGET_PROCESSOR MATCHES "^armv[5-8]") - LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch32-isa.c) - IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND ANDROID_ABI STREQUAL "armeabi") - SET_SOURCE_FILES_PROPERTIES(src/arm/linux/aarch32-isa.c PROPERTIES COMPILE_FLAGS -marm) - ENDIF() -- ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$") -+ ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(aarch64|arm64)$") - LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c) - ENDIF() - ELSEIF(IS_APPLE_OS AND CPUINFO_TARGET_PROCESSOR MATCHES "arm64.*") -@@ -395,7 +406,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS) - TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE _GNU_SOURCE=1) - ENDIF() - -- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a)$") -+ IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(armv5te|armv7-a)$") - ADD_EXECUTABLE(atm7029b-tablet-test test/mock/atm7029b-tablet.cc) - TARGET_INCLUDE_DIRECTORIES(atm7029b-tablet-test BEFORE PRIVATE test/mock) - TARGET_LINK_LIBRARIES(atm7029b-tablet-test PRIVATE cpuinfo_mock gtest) -@@ -577,7 +588,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS) - ADD_TEST(NAME xperia-sl-test COMMAND xperia-sl-test) - ENDIF() - -- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a|aarch64)$") -+ IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(armv5te|armv7-a|aarch64)$") - ADD_EXECUTABLE(alcatel-revvl-test test/mock/alcatel-revvl.cc) - TARGET_INCLUDE_DIRECTORIES(alcatel-revvl-test BEFORE PRIVATE test/mock) - TARGET_LINK_LIBRARIES(alcatel-revvl-test PRIVATE cpuinfo_mock gtest) -@@ -774,7 +785,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS) - ADD_TEST(NAME xperia-c4-dual-test COMMAND xperia-c4-dual-test) - ENDIF() - -- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(i686|x86_64)$") -+ IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(i686|x86_64)$") - ADD_EXECUTABLE(alldocube-iwork8-test test/mock/alldocube-iwork8.cc) - TARGET_INCLUDE_DIRECTORIES(alldocube-iwork8-test BEFORE PRIVATE test/mock) - TARGET_LINK_LIBRARIES(alldocube-iwork8-test PRIVATE cpuinfo_mock gtest) -@@ -831,7 +842,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_UNIT_TESTS) - ADD_TEST(NAME brand-string-test COMMAND brand-string-test) - ENDIF() - -- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") -+ IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") - ADD_LIBRARY(android_properties_interface STATIC test/name/android-properties-interface.c) - CPUINFO_TARGET_ENABLE_C99(android_properties_interface) - CPUINFO_TARGET_RUNTIME_LIBRARY(android_properties_interface) -@@ -879,7 +890,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_TOOLS) - TARGET_LINK_LIBRARIES(cache-info PRIVATE cpuinfo) - INSTALL(TARGETS cache-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) - -- IF(CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux)$" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") -+ IF(CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux)$" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") - ADD_EXECUTABLE(auxv-dump tools/auxv-dump.c) - CPUINFO_TARGET_ENABLE_C99(auxv-dump) - CPUINFO_TARGET_RUNTIME_LIBRARY(auxv-dump) diff --git a/cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch b/cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch deleted file mode 100644 index 44ac0f13f5466..0000000000000 --- a/cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch +++ /dev/null @@ -1,19 +0,0 @@ -diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c -index 5c0a5f3..a07fbe4 100644 ---- a/src/arm/windows/init.c -+++ b/src/arm/windows/init.c -@@ -249,6 +249,14 @@ static void set_cpuinfo_isa_fields(void) { - // guarantee that, but it holds in practice. - cpuinfo_isa.rdm = dotprod; - -+ // PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE may not be available in older -+ // Windows versions. If fp16arith was not detected with -+ // IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE), fall -+ // back to using the value of dotprod. -+ if (!cpuinfo_isa.fp16arith) { -+ cpuinfo_isa.fp16arith = dotprod; -+ } -+ - /* Windows API reports all or nothing for cryptographic instructions. */ - const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; - cpuinfo_isa.aes = crypto; diff --git a/cmake/vcpkg-ports/cpuinfo/patch_vcpkg_arm64ec_support.patch b/cmake/vcpkg-ports/cpuinfo/patch_vcpkg_arm64ec_support.patch deleted file mode 100644 index af0f039b6c2a3..0000000000000 --- a/cmake/vcpkg-ports/cpuinfo/patch_vcpkg_arm64ec_support.patch +++ /dev/null @@ -1,91 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index aedc983..dab589e 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -72,6 +72,17 @@ IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND CPUINFO_TARGET_PROCESSOR STREQUAL "am - ENDIF() - IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$") - SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}") -+ELSEIF(MSVC AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.10") -+ # Use CMAKE_C_COMPILER_ARCHITECTURE_ID. MSVC values are documented as available since CMake 3.10. -+ IF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "X86") -+ SET(CPUINFO_TARGET_PROCESSOR "x86") -+ ELSEIF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "x64") -+ SET(CPUINFO_TARGET_PROCESSOR "x86_64") -+ ELSEIF(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "^(ARM64|ARM64EC)$") -+ SET(CPUINFO_TARGET_PROCESSOR "arm64") -+ ELSE() -+ MESSAGE(FATAL_ERROR "Unsupported MSVC compiler architecture ID \"${CMAKE_C_COMPILER_ARCHITECTURE_ID}\"") -+ ENDIF() - ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_VS_PLATFORM_NAME) - IF(CMAKE_VS_PLATFORM_NAME STREQUAL "Win32") - SET(CPUINFO_TARGET_PROCESSOR "x86") -@@ -88,7 +99,7 @@ ENDIF() - - # ---[ Build flags - SET(CPUINFO_SUPPORTED_PLATFORM TRUE) --IF(NOT CMAKE_SYSTEM_PROCESSOR) -+IF(NOT CPUINFO_TARGET_PROCESSOR) - IF(NOT IOS) - MESSAGE(WARNING - "Target processor architecture is not specified. " -@@ -201,12 +212,12 @@ IF(CPUINFO_SUPPORTED_PLATFORM) - src/arm/linux/chipset.c - src/arm/linux/midr.c - src/arm/linux/hwcap.c) -- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]") -+ IF(CPUINFO_TARGET_PROCESSOR MATCHES "^armv[5-8]") - LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch32-isa.c) - IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND ANDROID_ABI STREQUAL "armeabi") - SET_SOURCE_FILES_PROPERTIES(src/arm/linux/aarch32-isa.c PROPERTIES COMPILE_FLAGS -marm) - ENDIF() -- ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$") -+ ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(aarch64|arm64)$") - LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c) - ENDIF() - ELSEIF(IS_APPLE_OS AND CPUINFO_TARGET_PROCESSOR MATCHES "arm64.*") -@@ -395,7 +406,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS) - TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE _GNU_SOURCE=1) - ENDIF() - -- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a)$") -+ IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(armv5te|armv7-a)$") - ADD_EXECUTABLE(atm7029b-tablet-test test/mock/atm7029b-tablet.cc) - TARGET_INCLUDE_DIRECTORIES(atm7029b-tablet-test BEFORE PRIVATE test/mock) - TARGET_LINK_LIBRARIES(atm7029b-tablet-test PRIVATE cpuinfo_mock gtest) -@@ -577,7 +588,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS) - ADD_TEST(NAME xperia-sl-test COMMAND xperia-sl-test) - ENDIF() - -- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a|aarch64)$") -+ IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(armv5te|armv7-a|aarch64)$") - ADD_EXECUTABLE(alcatel-revvl-test test/mock/alcatel-revvl.cc) - TARGET_INCLUDE_DIRECTORIES(alcatel-revvl-test BEFORE PRIVATE test/mock) - TARGET_LINK_LIBRARIES(alcatel-revvl-test PRIVATE cpuinfo_mock gtest) -@@ -774,7 +785,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS) - ADD_TEST(NAME xperia-c4-dual-test COMMAND xperia-c4-dual-test) - ENDIF() - -- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(i686|x86_64)$") -+ IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(i686|x86_64)$") - ADD_EXECUTABLE(alldocube-iwork8-test test/mock/alldocube-iwork8.cc) - TARGET_INCLUDE_DIRECTORIES(alldocube-iwork8-test BEFORE PRIVATE test/mock) - TARGET_LINK_LIBRARIES(alldocube-iwork8-test PRIVATE cpuinfo_mock gtest) -@@ -831,7 +842,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_UNIT_TESTS) - ADD_TEST(NAME brand-string-test COMMAND brand-string-test) - ENDIF() - -- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") -+ IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") - ADD_LIBRARY(android_properties_interface STATIC test/name/android-properties-interface.c) - CPUINFO_TARGET_ENABLE_C99(android_properties_interface) - CPUINFO_TARGET_RUNTIME_LIBRARY(android_properties_interface) -@@ -879,7 +890,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_TOOLS) - TARGET_LINK_LIBRARIES(cache-info PRIVATE cpuinfo) - INSTALL(TARGETS cache-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) - -- IF(CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux)$" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") -+ IF(CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux)$" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") - ADD_EXECUTABLE(auxv-dump tools/auxv-dump.c) - CPUINFO_TARGET_ENABLE_C99(auxv-dump) - CPUINFO_TARGET_RUNTIME_LIBRARY(auxv-dump) diff --git a/cmake/vcpkg-ports/cpuinfo/portfile.cmake b/cmake/vcpkg-ports/cpuinfo/portfile.cmake index 67bd18e61cc28..a1ede19e26ded 100644 --- a/cmake/vcpkg-ports/cpuinfo/portfile.cmake +++ b/cmake/vcpkg-ports/cpuinfo/portfile.cmake @@ -5,14 +5,12 @@ endif() vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH - REPO pytorch/cpuinfo - REF 403d652dca4c1046e8145950b1c0997a9f748b57 - SHA512 f7cd6dc44bd1120af610cae1337ed4c0f557ba78d2de9c73fed350fa3dfe9512643a1619ae55f5a540c6316a87d641856cca27297bb8766e48f39b7b7a59da1f - HEAD_REF master + REPO crvineeth97/cpuinfo + REF df8c6a8ce5cf12baabe5e7c9213aaeeffb18bd82 + SHA512 0 # TODO: update SHA512 after PR merges to pytorch/cpuinfo + HEAD_REF vchelur/add-cpuinfo-deinitialize PATCHES patch_cpuinfo_h_for_arm64ec.patch - patch_vcpkg_arm64ec_support.patch # https://github.com/pytorch/cpuinfo/pull/324 - win_arm_fp16_detection_fallback.patch # https://github.com/pytorch/cpuinfo/pull/348 ) vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS diff --git a/cmake/vcpkg-ports/cpuinfo/win_arm_fp16_detection_fallback.patch b/cmake/vcpkg-ports/cpuinfo/win_arm_fp16_detection_fallback.patch deleted file mode 100644 index 44ac0f13f5466..0000000000000 --- a/cmake/vcpkg-ports/cpuinfo/win_arm_fp16_detection_fallback.patch +++ /dev/null @@ -1,19 +0,0 @@ -diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c -index 5c0a5f3..a07fbe4 100644 ---- a/src/arm/windows/init.c -+++ b/src/arm/windows/init.c -@@ -249,6 +249,14 @@ static void set_cpuinfo_isa_fields(void) { - // guarantee that, but it holds in practice. - cpuinfo_isa.rdm = dotprod; - -+ // PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE may not be available in older -+ // Windows versions. If fp16arith was not detected with -+ // IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE), fall -+ // back to using the value of dotprod. -+ if (!cpuinfo_isa.fp16arith) { -+ cpuinfo_isa.fp16arith = dotprod; -+ } -+ - /* Windows API reports all or nothing for cryptographic instructions. */ - const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; - cpuinfo_isa.aes = crypto; diff --git a/onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h b/onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h index adc7b623ec8c4..21feb6e64dd01 100644 --- a/onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h +++ b/onnxruntime/contrib_ops/cpu/bert/gqa_attention_base.h @@ -207,7 +207,7 @@ class GQAAttentionBase { const size_t batch_index = i / num_heads_; const size_t head_index = i % num_heads_; const size_t total_seqlen = SafeInt(seqlens_k[batch_index]) + 1; - const size_t past_seqlen = is_prompt ? 0 : total_seqlen - sequence_length; // Assume no padding sequence length + const size_t past_seqlen = is_prompt ? 0 : (total_seqlen > sequence_length ? total_seqlen - sequence_length : 0); // Assume no padding sequence length const size_t past_chunk_length = SafeInt(past_seqlen) * head_size; const ptrdiff_t output_offset = SafeInt(i) * sequence_length * present_buffer_sequence_length; @@ -441,7 +441,7 @@ class GQAAttentionBase { const size_t batch_index = i / num_heads_; const size_t head_index = i % num_heads_; const size_t total_seqlen = SafeInt(seqlens_k[batch_index]) + 1; - const size_t past_seqlen = is_prompt ? 0 : total_seqlen - sequence_length; // Assume no padding sequence length + const size_t past_seqlen = is_prompt ? 0 : (total_seqlen > sequence_length ? total_seqlen - sequence_length : 0); // Assume no padding sequence length const size_t past_chunk_length = SafeInt(past_seqlen) * head_size; const T* v; diff --git a/onnxruntime/contrib_ops/cpu/bert/group_query_attention.cc b/onnxruntime/contrib_ops/cpu/bert/group_query_attention.cc index 5698bcb659f20..cf8e56908b2cf 100644 --- a/onnxruntime/contrib_ops/cpu/bert/group_query_attention.cc +++ b/onnxruntime/contrib_ops/cpu/bert/group_query_attention.cc @@ -173,11 +173,20 @@ Status GroupQueryAttention::Compute(OpKernelContext* context) const { for (int b = 0; b < batch_size; b++) { const int total_seqlen = seqlens_k->Data()[b] + 1; const int past_seqlen = total_seqlen - sequence_length; - for (int s = 0; s < sequence_length; s++) { - if (past_seqlen + s < total_seqlen) { - default_pos_ids[b * sequence_length + s] = static_cast(past_seqlen) + s; - } else { - default_pos_ids[b * sequence_length + s] = static_cast(1); + + // Handle inconsistent random data in seqlens_k, when past_seqlen becomes negative + if (past_seqlen < 0) { + // Fallback: generate consecutive position IDs starting from 0 + for (int s = 0; s < sequence_length; s++) { + default_pos_ids[b * sequence_length + s] = static_cast(s); + } + } else { + for (int s = 0; s < sequence_length; s++) { + if (past_seqlen + s < total_seqlen) { + default_pos_ids[b * sequence_length + s] = static_cast(past_seqlen) + s; + } else { + default_pos_ids[b * sequence_length + s] = static_cast(1); + } } } } diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 5990013c925c5..f28b769886e75 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -378,4 +378,15 @@ CPUIDInfo::CPUIDInfo() { #endif #endif // defined(CPUIDINFO_ARCH_ARM) } + +void CPUIDInfo::ShutDown() { +#if defined(CPUINFO_SUPPORTED) + static bool is_shutdown = false; + if (!is_shutdown && pytorch_cpuinfo_init_) { + cpuinfo_deinitialize(); + pytorch_cpuinfo_init_ = false; + is_shutdown = true; + } +#endif +} } // namespace onnxruntime diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index be301019df5c0..f036587a3de55 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -3,6 +3,7 @@ #pragma once +#include #include "core/common/common.h" #include "core/common/cpuid_arch_definition.h" @@ -11,8 +12,7 @@ namespace onnxruntime { class CPUIDInfo { public: static const CPUIDInfo& GetCPUIDInfo() { - static CPUIDInfo cpuid_info; - return cpuid_info; + return Instance(); } std::string_view GetCPUVendor() const { @@ -104,6 +104,12 @@ class CPUIDInfo { return has_fp16_; } + static void ShutdownCpuInfo() { + // Don't create the singleton during DLL unload. + if (!InstanceCreated().load(std::memory_order_acquire)) return; + Instance().ShutDown(); + } + private: // Log function that uses ORT logging if available or writes to stderr. // This enables us to log even before ORT logging has been initialized. @@ -111,6 +117,19 @@ class CPUIDInfo { CPUIDInfo(); + static std::atomic& InstanceCreated() { + static std::atomic created{false}; + return created; + } + + static CPUIDInfo& Instance() { + static CPUIDInfo cpuid_info; + InstanceCreated().store(true, std::memory_order_release); + return cpuid_info; + } + + void ShutDown(); + void VendorInfoInit(); #if defined(CPUIDINFO_ARCH_X86) diff --git a/onnxruntime/core/dll/dllmain.cc b/onnxruntime/core/dll/dllmain.cc index 9e50c6e07738f..f9f9b1c0237d8 100644 --- a/onnxruntime/core/dll/dllmain.cc +++ b/onnxruntime/core/dll/dllmain.cc @@ -9,6 +9,7 @@ #else #endif #include +#include "core/common/cpuid_info.h" #ifdef __GNUC__ #pragma GCC diagnostic pop #endif @@ -31,13 +32,15 @@ BOOL APIENTRY DllMain(HMODULE /*hModule*/, g_is_shutting_down = true; // do not do cleanup if process termination scenario #if defined(ONNXRUNTIME_ENABLE_MEMLEAK_CHECK) - // In leak-check builds we still want protobuf shutdown to avoid flagged leaks. + // In leak-check builds we still want protobuf and CPUInfo shutdown to avoid flagged leaks. ::google::protobuf::ShutdownProtobufLibrary(); + onnxruntime::CPUIDInfo::ShutdownCpuInfo(); #endif } else { // Cleanup protobuf library. // NOTE: it might be too early to do so, as all function local statics and global objects are not destroyed yet. ::google::protobuf::ShutdownProtobufLibrary(); + onnxruntime::CPUIDInfo::ShutdownCpuInfo(); } break; } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 4a6692778da0b..e2682817145d0 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -1870,13 +1870,13 @@ Status QnnBackendManager::ExtractBackendProfilingInfo(qnn::profile::ProfilingInf // ETW disabled previously, but enabled now if (ProfilingLevel::INVALID == profiling_level_etw_ && tracelogging_provider_ep_enabled) { - LOGS(*logger_, ERROR) << "ETW disabled previously, but enabled now. Can't do the switch! Won't output any profiling."; + LOGS(*logger_, VERBOSE) << "ETW disabled previously, but enabled now. Can't do the switch! Won't output any profiling."; return Status::OK(); } // ETW enabled previously, but disabled now if (ProfilingLevel::INVALID != profiling_level_etw_ && !tracelogging_provider_ep_enabled) { - LOGS(*logger_, ERROR) << "ETW enabled previously, but disabled now. Can't do the switch! Won't output any profiling."; + LOGS(*logger_, VERBOSE) << "ETW enabled previously, but disabled now. Can't do the switch! Won't output any profiling."; return Status::OK(); }