diff --git a/easybuild/easyconfigs/c/cuDNN/cuDNN-9.10.2.21-CUDA-12.6.0.eb b/easybuild/easyconfigs/c/cuDNN/cuDNN-9.10.2.21-CUDA-12.6.0.eb new file mode 100644 index 000000000000..427e5309a941 --- /dev/null +++ b/easybuild/easyconfigs/c/cuDNN/cuDNN-9.10.2.21-CUDA-12.6.0.eb @@ -0,0 +1,34 @@ +name = 'cuDNN' +version = '9.10.2.21' +versionsuffix = '-CUDA-%(cudaver)s' +homepage = 'https://developer.nvidia.com/cudnn' +description = """The NVIDIA CUDA Deep Neural Network library (cuDNN) is +a GPU-accelerated library of primitives for deep neural networks.""" + +toolchain = SYSTEM + +source_urls = [ + 'https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-%(cudnnarch)s/' +] +# note: cuDNN is tied to specific to CUDA versions, +# see also https://docs.nvidia.com/deeplearning/cudnn/support-matrix/index.html#cudnn-cuda-hardware-versions +sources = ['%(namelower)s-linux-%(cudnnarch)s-%(version)s_cuda%(cudamajver)s-archive.tar.xz'] +checksums = [{ + '%(namelower)s-linux-sbsa-%(version)s_cuda%(cudamajver)s-archive.tar.xz': + '4d57dceba3be27a68b078ce8630525bf40ab7f1b546eb45d0b363c3eeb55f8fa', + '%(namelower)s-linux-x86_64-%(version)s_cuda%(cudamajver)s-archive.tar.xz': + 'd0defcbc4c6dad711ff4cb66d254036a300c9071b07c7b64199aacab534313c1', +}] + +dependencies = [('CUDA', '12.6.0')] + +sanity_check_paths = { + 'files': [ + 'include/cudnn.h', 'lib64/libcudnn_adv_static.a', 'lib64/libcudnn_cnn_static.a', + 'lib64/libcudnn_engines_precompiled_static.a', 'lib64/libcudnn_engines_runtime_compiled_static.a', + 'lib64/libcudnn_graph_static.a', 'lib64/libcudnn_heuristic_static.a', 'lib64/libcudnn_ops_static.a', + ], + 'dirs': ['include', 'lib64'], +} + +moduleclass = 'numlib' diff --git a/easybuild/easyconfigs/n/NCCL/NCCL-2.27.5-GCCcore-13.3.0-CUDA-12.6.0.eb b/easybuild/easyconfigs/n/NCCL/NCCL-2.27.5-GCCcore-13.3.0-CUDA-12.6.0.eb new file mode 100644 index 000000000000..4b1bd8f94a17 --- /dev/null +++ b/easybuild/easyconfigs/n/NCCL/NCCL-2.27.5-GCCcore-13.3.0-CUDA-12.6.0.eb @@ -0,0 +1,26 @@ +name = 'NCCL' +version = '2.27.5' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://developer.nvidia.com/nccl' +description = """The NVIDIA Collective Communications Library (NCCL) implements multi-GPU and multi-node collective +communication primitives that are performance optimized for NVIDIA GPUs.""" + +toolchain = {'name': 'GCCcore', 'version': '13.3.0'} + +github_account = 'NVIDIA' +source_urls = [GITHUB_SOURCE] +sources = ['v%(version)s-1.tar.gz'] +checksums = ['e8a8972fc7f7517703510ef23608d41f6484db5331fca37827b4af3f66995344'] + +builddependencies = [('binutils', '2.42')] + +dependencies = [ + ('CUDA', '12.6.0', '', SYSTEM), + ('UCX-CUDA', '1.16.0', versionsuffix), +] + +# default CUDA compute capabilities to use (override via --cuda-compute-capabilities) +cuda_compute_capabilities = ['5.0', '6.0', '7.0', '7.5', '8.0', '8.6', '9.0'] + +moduleclass = 'lib' diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_readd-support-for-nvidia-cutlass-python-package.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_readd-support-for-nvidia-cutlass-python-package.patch new file mode 100644 index 000000000000..0e2848280d19 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.0_readd-support-for-nvidia-cutlass-python-package.patch @@ -0,0 +1,124 @@ +Allow use of the NVIDIA CUTLASS Python package if installed. +See https://github.com/pytorch/pytorch/pull/160180 + +Author: Alexander Grund (TU Dresden) + +diff -ur a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py +--- a/torch/_inductor/codecache.py 2025-10-15 19:15:08.000000000 +0200 ++++ b/torch/_inductor/codecache.py 2025-10-24 18:07:49.519431015 +0200 +@@ -3628,13 +3628,15 @@ + return "nvcc" + + +-def _cutlass_path() -> str: ++def _cutlass_path() -> Optional[str]: + if config.is_fbcode(): + from libfb.py import parutil + + return parutil.get_dir_path("cutlass-4-headers") + else: +- return config.cuda.cutlass_dir ++ from torch._inductor.codegen.cuda.cutlass_utils import try_import_cutlass ++ ++ return config.cuda.cutlass_dir if try_import_cutlass() else None + + + def _cutlass_paths() -> list[str]: +@@ -3649,6 +3651,8 @@ + def _clone_cutlass_paths(build_root: str) -> list[str]: + paths = _cutlass_paths() + cutlass_root = _cutlass_path() ++ if cutlass_root is None: ++ return [] + for path in _cutlass_paths(): + old_path = os.path.join(cutlass_root, path) + new_path = os.path.join(build_root, path) +@@ -3657,10 +3661,12 @@ + + + def _cutlass_include_paths() -> list[str]: +- cutlass_path = _cutlass_path() ++ cutlass_root = _cutlass_path() ++ if cutlass_root is None: ++ return [] + return [ + # Use realpath to get canonical absolute paths, in order not to mess up cache keys +- os.path.realpath(os.path.join(cutlass_path, path)) ++ os.path.realpath(os.path.join(cutlass_root, path)) + for path in _cutlass_paths() + ] + +diff -ur a/torch/_inductor/codegen/cuda/cutlass_utils.py b/torch/_inductor/codegen/cuda/cutlass_utils.py +--- a/torch/_inductor/codegen/cuda/cutlass_utils.py 2025-10-15 19:15:08.000000000 +0200 ++++ b/torch/_inductor/codegen/cuda/cutlass_utils.py 2025-10-24 18:07:49.520431003 +0200 +@@ -1,6 +1,7 @@ + # mypy: allow-untyped-defs + import atexit + import functools ++import importlib.metadata + import logging + import os + import shutil +@@ -15,6 +16,7 @@ + import torch + from torch._inductor.runtime.runtime_utils import dynamo_timed + from torch._inductor.utils import clear_on_fresh_cache ++from torch._vendor.packaging.version import Version + from torch.utils._ordered_set import OrderedSet + + from ... import config +@@ -73,7 +75,9 @@ + """ + We want to support three ways of passing in CUTLASS: + 1. fbcode, handled by the internal build system. +- 2. User specifies cutlass_dir. The default is ../third_party/cutlass/, ++ 2. pip install nvidia-cutlass, which provides the cutlass_library package ++ and the header files in the cutlass_library/source directory. ++ 3. User specifies cutlass_dir. The default is ../third_party/cutlass/, + which is the directory when developers build from source. + """ + if config.is_fbcode(): +@@ -89,6 +93,34 @@ + + return True + ++ try: ++ cutlass_version = Version(importlib.metadata.version("cutlass")) ++ if cutlass_version < Version("3.7"): ++ log.warning("CUTLASS version < 3.7 is not recommended.") ++ ++ import cutlass_library # type: ignore[import-not-found] # noqa: F811 ++ ++ log.debug( ++ "Found cutlass_library in python search path, overriding config.cuda.cutlass_dir" ++ ) ++ cutlass_library_dir = os.path.dirname(cutlass_library.__file__) ++ assert os.path.isdir(cutlass_library_dir), ( ++ f"{cutlass_library_dir} is not a directory" ++ ) ++ config.cuda.cutlass_dir = os.path.abspath( ++ os.path.join( ++ cutlass_library_dir, ++ "source", ++ ) ++ ) ++ ++ return True ++ except (ModuleNotFoundError, importlib.metadata.PackageNotFoundError): ++ log.debug( ++ "cutlass_library not found in sys.path, trying to import from config.cuda.cutlass_dir", ++ exc_info=True, ++ ) ++ + # Copy CUTLASS python scripts to a temp dir and add the temp dir to Python search path. + # This is a temporary hack to avoid CUTLASS module naming conflicts. + # TODO(ipiszy): remove this hack when CUTLASS solves Python scripts packaging structure issues. +@@ -156,7 +188,7 @@ + ) + + try: +- import cutlass # noqa: F401, F811 ++ import cutlass # noqa: F401 + import cutlass_library.generator # noqa: F401 + import cutlass_library.library # noqa: F401 + import cutlass_library.manifest # noqa: F401 diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2024a-CUDA-12.6.0.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2024a-CUDA-12.6.0.eb new file mode 100644 index 000000000000..3ef4f5d0f6b4 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2024a-CUDA-12.6.0.eb @@ -0,0 +1,304 @@ +name = 'PyTorch' +version = '2.9.1' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://pytorch.org/' +description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration. +PyTorch is a deep learning framework that puts Python first.""" + +toolchain = {'name': 'foss', 'version': '2024a'} + +local_six_version = '1.11.0' +# This is specific to a (tagged) release. +# Extract from `get_disabled_tests` in tools/stats/import_test_stats.py +local_disabled_tests_S3_ID = 'UsscdNP.2GMOzUxAvqIx8GAj4MuhX1Xi' +source_urls = [GITHUB_RELEASE] +sources = [ + '%(namelower)s-v%(version)s.tar.gz', + { + 'filename': '%(name)s-%(version)s-disabled-tests.json', + 'download_filename': f'disabled-tests-condensed.json?versionId={local_disabled_tests_S3_ID}', + 'source_urls': ['https://ossci-metrics.s3.amazonaws.com'], + # See `DEFAULT_DISABLED_TESTS_FILE` in torch/testing/_internal/common_utils.py + 'extract_cmd': 'cp %s %(builddir)s/pytorch-v%(version)s/test/.pytorch-disabled-tests.json', + }, + { + # Avoid downloading this during the build, see third_party/NNPACK/cmake/DownloadSix.cmake for the version + 'filename': f'six-{local_six_version}.tar.gz', + 'source_urls': ['https://pypi.python.org/packages/source/s/six'], + } +] +patches = [ + 'PyTorch-1.12.1_add-hypothesis-suppression.patch', + 'PyTorch-1.7.0_disable-dev-shm-test.patch', + 'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch', + 'PyTorch-2.1.0_remove-test-requiring-online-access.patch', + 'PyTorch-2.6.0_fix-server-in-test_control_plane.patch', + 'PyTorch-2.6.0_show-test-duration.patch', + 'PyTorch-2.6.0_skip-test_segfault.patch', + 'PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch', + 'PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch', + 'PyTorch-2.7.1_skip-test_data_parallel_rnn.patch', + 'PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch', + 'PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch', + 'PyTorch-2.7.1_skip-test_outside_linear_module_free.patch', + 'PyTorch-2.7.1_suport-64bit-BARs.patch', + 'PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch', + 'PyTorch-2.9.0_disable-test_nan_assert.patch', + 'PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch', + 'PyTorch-2.9.0_fix-attention-squeeze.patch', + 'PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch', + 'PyTorch-2.9.0_fix-nccl-test-env.patch', + 'PyTorch-2.9.0_fix-test_exclude_padding.patch', + 'PyTorch-2.9.0_fix-test_version_error.patch', + 'PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch', + 'PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch', + 'PyTorch-2.9.0_remove-faulty-close.patch', + 'PyTorch-2.9.0_revert-pybind11-3-change.patch', + 'PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch', + 'PyTorch-2.9.0_skip-test_convolution1-on-H100.patch', + 'PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch', + 'PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch', + 'PyTorch-2.9.0_skip-test_override-without-CUDA.patch', + 'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch', + 'PyTorch-2.9.0_skip-test_unbacked_reduction.patch', + 'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch', + 'PyTorch-2.9.0_update-exptected-output-for-z3-4.13.0.patch', + 'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch', + 'PyTorch-2.9.1_avoid-using-wrong-libomp.patch', + 'PyTorch-2.9.1_check-device-avail-test_schedule.patch', + 'PyTorch-2.9.1_disable-slow-tests.patch', + 'PyTorch-2.9.1_dont-print-test-items.patch', + 'PyTorch-2.9.1_fix-hypothesis-deadline.patch', + 'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch', + 'PyTorch-2.9.1_fix-test_dist2-decorators.patch', + 'PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch', + 'PyTorch-2.9.1_GCC14-ARM-workaround.patch', + 'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch', + 'PyTorch-2.9.1_normalize_tree_output.patch', + 'PyTorch-2.9.1_set-test-timeout.patch', + 'PyTorch-2.9.1_skip-cutlass-addmm-test.patch', + 'PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch', + 'PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch', + 'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch', + 'PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch', + 'PyTorch-2.9.1_skip-tests-requiring-SM90.patch', + 'PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch', + 'PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch', + 'PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch', + 'PyTorch-2.9.1_skip-test_optree_graph_break_message.patch', + 'PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch', +] +checksums = [ + {'pytorch-v2.9.1.tar.gz': 'e17504700ebc4c87f9b57059df1c4d790b769458c04db144c7a92aea90f2c92b'}, + {'PyTorch-2.9.1-disabled-tests.json': '471f8aa36e056173d09ffd421ead45539a8d35fec6e61a8a0050d92a5fcd9f04'}, + {'six-1.11.0.tar.gz': '70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9'}, + {'PyTorch-1.12.1_add-hypothesis-suppression.patch': + 'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, + {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, + {'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch': + '166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'}, + {'PyTorch-2.1.0_remove-test-requiring-online-access.patch': + '35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'}, + {'PyTorch-2.6.0_fix-server-in-test_control_plane.patch': + '1337689ff28ecaa8d1d0edf60d322bcdd7846fec040925325d357b19eb6e4342'}, + {'PyTorch-2.6.0_show-test-duration.patch': '5508f2f9619204d9f3c356dbd4000a00d58f452ab2d64ae920eb8bc8b5484d75'}, + {'PyTorch-2.6.0_skip-test_segfault.patch': '26806bd62e6b61b56ebaa52d68ca44c415a28124f684bd2fb373557ada68ef52'}, + {'PyTorch-2.7.0_avoid_caffe2_test_cpp_jit.patch': + '2f3255e067f5c6f0d78b4fbce94784c41bddf3d01bab9673856b0d0bbc4e3fec'}, + {'PyTorch-2.7.1_avoid-caffe2-sandcastle-test-lib.patch': + 'aaf22cb431357dc78e4db895d64febf1c7ee187e8ad27bd13544d011127354d4'}, + {'PyTorch-2.7.1_skip-test_data_parallel_rnn.patch': + 'aa85b678e89db4bb41d2c5f4990f0d05959be92e61918291cb5609685b7f1841'}, + {'PyTorch-2.7.1_skip-test_gds_fails_in_ci.patch': + '503030c3591196510a3c2d95db30b28a0b396adb8b50ff0d221f6bdb1f939935'}, + {'PyTorch-2.7.1_skip-test_mixed_mm_exhaustive_dtypes.patch': + '709288abc802c9eb687c15f2677ebaf408d8325a4cb470d23cb72447ee0b8e13'}, + {'PyTorch-2.7.1_skip-test_outside_linear_module_free.patch': + '4916a256b2b9914e4fdb930681b80df93ea561ddee2fc9978c4973a5650be5e9'}, + {'PyTorch-2.7.1_suport-64bit-BARs.patch': '317c3d220aa87426d86e137a6c1a8f910adf9580ca0848371e0f6800c05dbde1'}, + {'PyTorch-2.7.1_tolerance-test_partial_flat_weights.patch': + 'f304440a57e00b8052a5ffbf285adad8d0fdc5a812a659420b59a20deb5a9942'}, + {'PyTorch-2.9.0_disable-test_nan_assert.patch': '98e9f98ce8fb89ae368739bc039be69040ed446a1c74ee5c2a1ef8ba60986c7d'}, + {'PyTorch-2.9.0_enable-symbolizer-in-test_workspace_allocation_error.patch': + 'ba4032b967c0393c916a26fb2b117ba40670ae8e809cb34399a6379b4e523d72'}, + {'PyTorch-2.9.0_fix-attention-squeeze.patch': '8f040e74780cab391bb4c84f86390a13230e1a309ddf65db9900d9a1c66e1288'}, + {'PyTorch-2.9.0_fix-FP16-CPU-tests-in-test_torchinductor_opinfo.patch': + 'b696d7be8c55ff1ccf8731dccf119b8792cd9593eaff457f37e76114e52346d2'}, + {'PyTorch-2.9.0_fix-nccl-test-env.patch': '9326223c400262788734ec608f6134c5d240f4d5315a8d294179a28f885d6845'}, + {'PyTorch-2.9.0_fix-test_exclude_padding.patch': + '349850874fb75d57a24437d871a4994a773e501632ce66a2adca613380a152dc'}, + {'PyTorch-2.9.0_fix-test_version_error.patch': 'b10bb10d0a353e4ba7dbef28ca5fef03a8ba552896e1982708aa90ab6f24f34f'}, + {'PyTorch-2.9.0_honor-XDG_CACHE_HOME.patch': '239631258431174e4aed8947ae6096e003a3213bfbfa112cd0cdebae89469164'}, + {'PyTorch-2.9.0_increase-tolerance-in-test_transformers.patch': + 'c27ab34900835c2a15edc26d481343a16433bfa52f635a80cbab252c1320a545'}, + {'PyTorch-2.9.0_remove-faulty-close.patch': '32ca744d68dcfa669e46ced9d2776af3dcc380dd9c3458ba7c1c432e5c5295b3'}, + {'PyTorch-2.9.0_revert-pybind11-3-change.patch': + '5289894011fefc67482b1e19c9d1c502e94a943fc7a2d5ed5a6a1eaf444570a0'}, + {'PyTorch-2.9.0_skip-test_benchmark_on_non_zero_device.patch': + '85e236431d1a5da3fb7fccc2554640898c29f5fab46a41d15b3ab61dd1f924fc'}, + {'PyTorch-2.9.0_skip-test_convolution1-on-H100.patch': + '704750c7cc08b58779907d608cd4b7505043e394fb27530b16d72a0dc27c277e'}, + {'PyTorch-2.9.0_skip-test_inductor_all_gather_into_tensor_coalesced.patch': + '644153d4c1d8267c0631df2902a6dfe8ec2a197f3374f2a2f5654e6bd0edc05e'}, + {'PyTorch-2.9.0_skip-test_original_aten_preserved_pad_mm.patch': + 'ac9e05d296cd5ff938a44662cd022efcc8133c744ca82b045c6a15bc64f67cf4'}, + {'PyTorch-2.9.0_skip-test_override-without-CUDA.patch': + '967512d1487bf1ad06982cc5b976c0b38ba062c3f3473cb4542c4b9ac0740662'}, + {'PyTorch-2.9.0_skip-tests-requiring-CUDA-12.8.patch': + '6d79aff5291627b86d8fea025bf2379e4065c7d9cbef5cf83452c35922848728'}, + {'PyTorch-2.9.0_skip-test_unbacked_reduction.patch': + 'b51dd5d7c9cfeed946cbc5c7fc22f2e78e1fa52dda55569b957c20ca4ed01fe8'}, + {'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch': + '2e73f71ea0f09e613cc4a108893e7948b6daf239e3fe42fd2d3ae5d43c3cf9de'}, + {'PyTorch-2.9.0_update-exptected-output-for-z3-4.13.0.patch': + '5c68e0de73212ed266879f4528a6041ef7ab2f1ac83c6cf7142c4baa78e7664c'}, + {'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch': + '86ce380e69b3b20e010d817889cb1b825b05b4054a045b00f2ac12161b77d7e4'}, + {'PyTorch-2.9.1_avoid-using-wrong-libomp.patch': + '2fc2bb82cce87ba0ce73718b0502735ecdf360ca6bfac4482396f7f1c51c1866'}, + {'PyTorch-2.9.1_check-device-avail-test_schedule.patch': + '64c28d38ce69147565509add36d310473ce46f14a0a876d38b5049cb7fce9817'}, + {'PyTorch-2.9.1_disable-slow-tests.patch': '6b365a3531b0ac5446b5f0e8ab924b5e5742cd0331e6d9ec979118a3ef0ffc09'}, + {'PyTorch-2.9.1_dont-print-test-items.patch': '2b524cf3d557c0672feefc3a7165e5555e549b0720647a84d546f769cea0be07'}, + {'PyTorch-2.9.1_fix-hypothesis-deadline.patch': 'f7a130669eee9924a303df9e2bd5743ff023a7d994b7a3e43c86dcccf0206c49'}, + {'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch': + 'ab408275ec66e836112a50054acc4e789ef38196efeb6137c6061d60d9ac9ead'}, + {'PyTorch-2.9.1_fix-test_dist2-decorators.patch': + 'bf4ed805f00775ed33351de7bce40ebf4eac16aff6c61d2e91790982bc43d73b'}, + {'PyTorch-2.9.1_fix-TestExportOpInfoCPU-with-single-GPU.patch': + 'bdddf5a9ba47d57ec96f4bbefc3b85c4904e44de93dc5c7a65bc03e343035ae9'}, + {'PyTorch-2.9.1_GCC14-ARM-workaround.patch': 'ea8a8662e20fae2fb3a74c7f8bf390aba80a598ab37f9131c720d25ebb14965d'}, + {'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch': + 'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'}, + {'PyTorch-2.9.1_normalize_tree_output.patch': '7d5994580339b73c28de595d9e5a0448db97b7d284f17efd18909e4613d170df'}, + {'PyTorch-2.9.1_set-test-timeout.patch': '15fa1149c250b1333b0bc491f659aaf89d5d6eaf6df5ebc81eea545478c1239c'}, + {'PyTorch-2.9.1_skip-cutlass-addmm-test.patch': + '1f81a8a9eea8eda51fc93dff84cd994772febf4fd05d77efbf21b8440dadfd4e'}, + {'PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch': + 'd8489c192da549083569e09e5f94d2a83c9e41e111b1322f86512a9c5a58c0d9'}, + {'PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch': + 'e544f765beac7bdb3fc0ada98a3f92fd7e511ed8874de085aa2f213cca769d40'}, + {'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch': + '3cf0b11136fb18c45072687eafd3024d91b504d231a4fa40e04bc62d8d6019c7'}, + {'PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch': + 'e57486cc42f3dbcae29753168febc251d070a283229e2d76ccbdf19fee53f06e'}, + {'PyTorch-2.9.1_skip-tests-requiring-SM90.patch': + '7db02152db2ae70c0fd4c4602fe381e26a74b8e4f7b16b1a3554b2353d761b10'}, + {'PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch': + 'dd82203ce3b6262255aba6b59fb3b547c4c17875d5711f6d3d489aa8f0f59f32'}, + {'PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch': + '99055fde02ca17c1db1cd72f41821387a50901d6cd947161cafa12257b3a1c5a'}, + {'PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch': + '4fc772293047dc737b99e232b8a8db904aa8e88e3c8b2bcc3602fb723941fb89'}, + {'PyTorch-2.9.1_skip-test_optree_graph_break_message.patch': + '2ef1ad424d5f12a4d0ae06938da623819596cee7c0fb4616008f27583c29494d'}, + {'PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch': + '03756a8069bad01018f422f41aa24c7c543519fd857db88a0c6de661976c8859'}, +] + +osdependencies = [OS_PKG_IBVERBS_DEV] + +builddependencies = [ + ('CMake', '3.29.3'), + ('hypothesis', '6.103.1'), + ('setuptools', '80.9.0'), + # For tests + ('parameterized', '0.9.0'), + ('pytest-flakefinder', '1.1.0'), + ('pytest-rerunfailures', '15.0'), + ('pytest-subtests', '0.13.1'), + ('tlparse', '0.4.0'), + ('optree', '0.14.1'), + ('unittest-xml-reporting', '3.1.0'), +] + +dependencies = [ + ('CUDA', '12.6.0', '', SYSTEM), + # PyTorch is very sensitive to the NCCL & cuDNN versions. (Maybe the same for cuSPARSELt) + # Prefer those (listed per CUDA version) in + # https://github.com/pytorch/pytorch/blob/main/.github/scripts/generate_binary_build_matrix.py + # or https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_cuda.sh + ('NCCL', '2.27.5', versionsuffix), + ('cuDNN', '9.10.2.21', versionsuffix, SYSTEM), + ('magma', '2.9.0', versionsuffix), + ('cuSPARSELt', '0.6.3.2', versionsuffix, SYSTEM), + # Version from .ci/docker/triton_version.txt + ('Triton', '3.5.0', versionsuffix), + ('Ninja', '1.12.1'), # Required for JIT compilation of C++ extensions + ('Python', '3.12.3'), + ('Python-bundle-PyPI', '2024.06'), + ('expecttest', '0.2.1'), + ('GMP', '6.3.0'), + ('MPFR', '4.2.1'), + ('networkx', '3.4.2'), + ('numactl', '2.0.18'), + ('Pillow', '10.4.0'), + ('protobuf-python', '5.28.0'), + ('protobuf', '28.0'), + ('pybind11', '2.12.0'), + ('PuLP', '2.8.0'), + ('PyYAML', '6.0.2'), + ('pyzstd', '0.16.2'), + ('SciPy-bundle', '2024.05'), + ('sympy', '1.13.3'), + ('Z3', '4.13.0',), +] + +prebuildopts = (f"""sed -i '1i set(PYTHON_SIX_SOURCE_DIR "%(builddir)s/six-{local_six_version}")' """ + "cmake/Dependencies.cmake && ") +buildcmd = '%(python)s setup.py build' # Run the (long) build in the build step + +excluded_tests = { + '': [ + # This test seems to take too long on NVIDIA Ampere at least. + 'distributed/test_distributed_spawn', + # no xdoctest + 'doctests', + # intermittent failures on various systems + # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 + 'distributed/rpc/test_tensorpipe_agent', + # This test is expected to fail when run in their CI, but won't in our case. + # It just checks for a "CI" env variable + 'test_ci_sanity_check_fail', + # Requires pwlf Python package + 'distributed/_tools/test_sac_ilp', 'distributed/_tools/test_sac_estimator', + # 9 failures in H100, 7 are present in PYPI package, 2 are related to GC in Python < 3.12.4 + 'dynamo/test_dynamic_shapes', + # Broken test: https://github.com/pytorch/pytorch/issues/162179 + 'distributed/_composable/fsdp/test_fully_shard_logging', + # Broken: https://github.com/pytorch/pytorch/issues/137027 + 'inductor/test_extension_backend', + # Requires optional Python packages + 'test_public_bindings', + # 1 Failure and not important + 'dynamo/test_utils', + # Packaging test only, not important for us + 'test_license', + # Occasional segfaults on CPU + 'inductor/test_flex_attention', + 'inductor/test_flex_decoding ', + ] +} + +runtest = ( + # Disable symbol resolution in stack traces that can cause hangs and slowdowns + ' TORCH_DISABLE_ADDR2LINE=1' + ' TORCHINDUCTOR_CUTLASS_DIR=%(start_dir)s/third_party/cutlass' + ' PYTEST_ADDOPTS=--full-trace' + ' PYTHONUNBUFFERED=1' + ' %(python)s test/run_test.py' + ' --continue-through-error --pipe-logs --verbose' + ' %(excluded_tests)s' +) + +postinstallcmds = [ + "mkdir %(installdir)s/extra", + "cp -r third_party/cutlass %(installdir)s/extra/", +] + +modextrapaths = {'TORCHINDUCTOR_CUTLASS_DIR': 'extra/cutlass'} + +tests = ['PyTorch-check-cpp-extension.py', 'PyTorch-check-cutlass.py'] + +moduleclass = 'ai'