Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5cf4096
adding easyconfigs: PyTorch-2.9.0-foss-2024a-CUDA-12.6.0.eb and patch…
Flamefire Oct 24, 2025
9beff55
Add dependencies
Flamefire Dec 9, 2025
867d1a2
Use tlparse 0.4.0
Flamefire Dec 15, 2025
4dc0cc6
Update FlexAttention patch
Flamefire Dec 18, 2025
117a394
Skip tests requiring CUDA SM 9.0
Flamefire Jan 7, 2026
5170b50
Add patch avoiding infinite test hang
Flamefire Jan 8, 2026
99bdbbd
Add patch avoiding infinite test hang
Flamefire Jan 15, 2026
f5fbb91
Merge branch 'develop' into 20251024183337_new_pr_PyTorch290
Flamefire Jan 21, 2026
935c57a
Add patch avoiding infinite test hang
Flamefire Jan 22, 2026
874f0c5
More patches
Flamefire Jan 22, 2026
8a7320e
Add comment on TORCH_DISABLE_ADDR2LINE variable
Flamefire Jan 23, 2026
7a46620
Fix patched skip markers
Flamefire Feb 5, 2026
de9d0f4
Add comment for DISABLE_ADDR2LINE
Flamefire Feb 5, 2026
6015d47
Set test timeout
Flamefire Feb 9, 2026
c342afd
Add GCC 14 patch
Flamefire Feb 10, 2026
a02dba5
Add patches for test fixes and skip slow&disabled tests
Flamefire Feb 12, 2026
1a86b08
Remove duplicate source_url
Flamefire Feb 12, 2026
7cc912c
Add PyTorch-2.6.0_fix-server-in-test_control_plane
Flamefire Feb 13, 2026
096f3ad
Merge branch 'easybuilders:develop' into 20251024183337_new_pr_PyTorc…
Flamefire Feb 13, 2026
8a7f62d
Merge branch 'easybuilders:develop' into 20251024183337_new_pr_PyTorc…
Flamefire Feb 16, 2026
e1e381e
Merge branch 'develop' into 20251024183337_new_pr_PyTorch290
Flamefire Feb 24, 2026
b81f710
Fix race condition in checking for disabled tests
Flamefire Mar 3, 2026
ed504e8
Remove pytest-shard
Flamefire Mar 4, 2026
deda8cb
Add more patches
Flamefire Mar 5, 2026
efa1f26
Fix using wrong OpenMP library
Flamefire Mar 11, 2026
66ba96f
Skip segfaulting flex_attention suite
Flamefire Mar 11, 2026
63a8597
Skip some tests failing on ARM
Flamefire Mar 13, 2026
3977123
Merge branch 'easybuilders:develop' into 20251024183337_new_pr_PyTorc…
Flamefire Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions easybuild/easyconfigs/c/cuDNN/cuDNN-9.10.2.21-CUDA-12.6.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name = 'cuDNN'
version = '9.10.2.21'
versionsuffix = '-CUDA-%(cudaver)s'
homepage = 'https://developer.nvidia.com/cudnn'
description = """The NVIDIA CUDA Deep Neural Network library (cuDNN) is
a GPU-accelerated library of primitives for deep neural networks."""

toolchain = SYSTEM

source_urls = [
'https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-%(cudnnarch)s/'
]
# note: cuDNN is tied to specific to CUDA versions,
# see also https://docs.nvidia.com/deeplearning/cudnn/support-matrix/index.html#cudnn-cuda-hardware-versions
sources = ['%(namelower)s-linux-%(cudnnarch)s-%(version)s_cuda%(cudamajver)s-archive.tar.xz']
checksums = [{
'%(namelower)s-linux-sbsa-%(version)s_cuda%(cudamajver)s-archive.tar.xz':
'4d57dceba3be27a68b078ce8630525bf40ab7f1b546eb45d0b363c3eeb55f8fa',
'%(namelower)s-linux-x86_64-%(version)s_cuda%(cudamajver)s-archive.tar.xz':
'd0defcbc4c6dad711ff4cb66d254036a300c9071b07c7b64199aacab534313c1',
}]

dependencies = [('CUDA', '12.6.0')]

sanity_check_paths = {
'files': [
'include/cudnn.h', 'lib64/libcudnn_adv_static.a', 'lib64/libcudnn_cnn_static.a',
'lib64/libcudnn_engines_precompiled_static.a', 'lib64/libcudnn_engines_runtime_compiled_static.a',
'lib64/libcudnn_graph_static.a', 'lib64/libcudnn_heuristic_static.a', 'lib64/libcudnn_ops_static.a',
],
'dirs': ['include', 'lib64'],
}

moduleclass = 'numlib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name = 'NCCL'
version = '2.27.5'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://developer.nvidia.com/nccl'
description = """The NVIDIA Collective Communications Library (NCCL) implements multi-GPU and multi-node collective
communication primitives that are performance optimized for NVIDIA GPUs."""

toolchain = {'name': 'GCCcore', 'version': '13.3.0'}

github_account = 'NVIDIA'
source_urls = [GITHUB_SOURCE]
sources = ['v%(version)s-1.tar.gz']
checksums = ['e8a8972fc7f7517703510ef23608d41f6484db5331fca37827b4af3f66995344']

builddependencies = [('binutils', '2.42')]

dependencies = [
('CUDA', '12.6.0', '', SYSTEM),
('UCX-CUDA', '1.16.0', versionsuffix),
]

# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
cuda_compute_capabilities = ['5.0', '6.0', '7.0', '7.5', '8.0', '8.6', '9.0']

moduleclass = 'lib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
Allow use of the NVIDIA CUTLASS Python package if installed.
See https://github.com/pytorch/pytorch/pull/160180

Author: Alexander Grund (TU Dresden)

diff -ur a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py
--- a/torch/_inductor/codecache.py 2025-10-15 19:15:08.000000000 +0200
+++ b/torch/_inductor/codecache.py 2025-10-24 18:07:49.519431015 +0200
@@ -3628,13 +3628,15 @@
return "nvcc"


-def _cutlass_path() -> str:
+def _cutlass_path() -> Optional[str]:
if config.is_fbcode():
from libfb.py import parutil

return parutil.get_dir_path("cutlass-4-headers")
else:
- return config.cuda.cutlass_dir
+ from torch._inductor.codegen.cuda.cutlass_utils import try_import_cutlass
+
+ return config.cuda.cutlass_dir if try_import_cutlass() else None


def _cutlass_paths() -> list[str]:
@@ -3649,6 +3651,8 @@
def _clone_cutlass_paths(build_root: str) -> list[str]:
paths = _cutlass_paths()
cutlass_root = _cutlass_path()
+ if cutlass_root is None:
+ return []
for path in _cutlass_paths():
old_path = os.path.join(cutlass_root, path)
new_path = os.path.join(build_root, path)
@@ -3657,10 +3661,12 @@


def _cutlass_include_paths() -> list[str]:
- cutlass_path = _cutlass_path()
+ cutlass_root = _cutlass_path()
+ if cutlass_root is None:
+ return []
return [
# Use realpath to get canonical absolute paths, in order not to mess up cache keys
- os.path.realpath(os.path.join(cutlass_path, path))
+ os.path.realpath(os.path.join(cutlass_root, path))
for path in _cutlass_paths()
]

diff -ur a/torch/_inductor/codegen/cuda/cutlass_utils.py b/torch/_inductor/codegen/cuda/cutlass_utils.py
--- a/torch/_inductor/codegen/cuda/cutlass_utils.py 2025-10-15 19:15:08.000000000 +0200
+++ b/torch/_inductor/codegen/cuda/cutlass_utils.py 2025-10-24 18:07:49.520431003 +0200
@@ -1,6 +1,7 @@
# mypy: allow-untyped-defs
import atexit
import functools
+import importlib.metadata
import logging
import os
import shutil
@@ -15,6 +16,7 @@
import torch
from torch._inductor.runtime.runtime_utils import dynamo_timed
from torch._inductor.utils import clear_on_fresh_cache
+from torch._vendor.packaging.version import Version
from torch.utils._ordered_set import OrderedSet

from ... import config
@@ -73,7 +75,9 @@
"""
We want to support three ways of passing in CUTLASS:
1. fbcode, handled by the internal build system.
- 2. User specifies cutlass_dir. The default is ../third_party/cutlass/,
+ 2. pip install nvidia-cutlass, which provides the cutlass_library package
+ and the header files in the cutlass_library/source directory.
+ 3. User specifies cutlass_dir. The default is ../third_party/cutlass/,
which is the directory when developers build from source.
"""
if config.is_fbcode():
@@ -89,6 +93,34 @@

return True

+ try:
+ cutlass_version = Version(importlib.metadata.version("cutlass"))
+ if cutlass_version < Version("3.7"):
+ log.warning("CUTLASS version < 3.7 is not recommended.")
+
+ import cutlass_library # type: ignore[import-not-found] # noqa: F811
+
+ log.debug(
+ "Found cutlass_library in python search path, overriding config.cuda.cutlass_dir"
+ )
+ cutlass_library_dir = os.path.dirname(cutlass_library.__file__)
+ assert os.path.isdir(cutlass_library_dir), (
+ f"{cutlass_library_dir} is not a directory"
+ )
+ config.cuda.cutlass_dir = os.path.abspath(
+ os.path.join(
+ cutlass_library_dir,
+ "source",
+ )
+ )
+
+ return True
+ except (ModuleNotFoundError, importlib.metadata.PackageNotFoundError):
+ log.debug(
+ "cutlass_library not found in sys.path, trying to import from config.cuda.cutlass_dir",
+ exc_info=True,
+ )
+
# Copy CUTLASS python scripts to a temp dir and add the temp dir to Python search path.
# This is a temporary hack to avoid CUTLASS module naming conflicts.
# TODO(ipiszy): remove this hack when CUTLASS solves Python scripts packaging structure issues.
@@ -156,7 +188,7 @@
)

try:
- import cutlass # noqa: F401, F811
+ import cutlass # noqa: F401
import cutlass_library.generator # noqa: F401
import cutlass_library.library # noqa: F401
import cutlass_library.manifest # noqa: F401
Loading
Loading