From df554de824fd1824a9b671a3663c30d7c549ce80 Mon Sep 17 00:00:00 2001 From: Yonghye Kwon Date: Sat, 28 Mar 2026 08:51:27 +0900 Subject: [PATCH] Add OpenMP compile/link flags to setup.py for source builds Source builds of torchvision do not pass -fopenmp (compile) or -lomp/-lgomp (link) flags when building the _C extension. Since at::parallel_for is a header-only template whose #pragma omp directives are compiled into the calling translation unit (_C.so), the missing flags cause it to silently fall back to sequential execution. This has had no observable effect so far because no existing torchvision C++ kernel directly uses at::parallel_for or #pragma omp. However, upcoming changes (e.g. #9442) introduce at::parallel_for, and without these flags source builds get 0% speedup from parallelization. - macOS: -Xpreprocessor -fopenmp (compile) + -lomp from PyTorch's bundled libomp (link) - Linux: -fopenmp (compile) + -lgomp (link) - Windows: unchanged (uses /openmp via MSVC, already handled separately) Fixes #2783 Signed-off-by: Yonghye Kwon --- setup.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/setup.py b/setup.py index ce6c5f3a395..697973086c0 100644 --- a/setup.py +++ b/setup.py @@ -131,6 +131,12 @@ def get_macros_and_flags(): if sysconfig.get_config_var("Py_GIL_DISABLED"): extra_compile_args["cxx"].append("-DPy_GIL_DISABLED") + if sys.platform == "darwin": + extra_compile_args["cxx"].append("-Xpreprocessor") + extra_compile_args["cxx"].append("-fopenmp") + elif sys.platform != "win32": + extra_compile_args["cxx"].append("-fopenmp") + if DEBUG: extra_compile_args["cxx"].append("-g") extra_compile_args["cxx"].append("-O0") @@ -182,12 +188,22 @@ def make_C_extension(): sources += mps_sources define_macros, extra_compile_args = get_macros_and_flags() + + extra_link_args = [] + if sys.platform == "darwin": + # Link against libomp shipped with PyTorch for at::parallel_for support + torch_lib_dir = os.path.join(os.path.dirname(torch.__file__), "lib") + extra_link_args = [f"-L{torch_lib_dir}", "-lomp"] + elif sys.platform != "win32": + extra_link_args = ["-lgomp"] + return Extension( name="torchvision._C", sources=sorted(str(s) for s in sources), include_dirs=[CSRS_DIR], define_macros=define_macros, extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, )