Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions easybuild/easyconfigs/p/PyTorch/PyTorch-2.9.1-foss-2024a.eb
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ patches = [
'PyTorch-2.9.0_skip-unexpected-success-in-test_fake_export.patch',
'PyTorch-2.9.0_update-exptected-output-for-z3-4.13.0.patch',
'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch',
'PyTorch-2.9.1_avoid-using-wrong-libomp.patch',
'PyTorch-2.9.1_check-device-avail-test_schedule.patch',
'PyTorch-2.9.1_disable-slow-tests.patch',
'PyTorch-2.9.1_dont-print-test-items.patch',
'PyTorch-2.9.1_fix-hypothesis-deadline.patch',
'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch',
'PyTorch-2.9.1_fix-test_dist2-decorators.patch',
Expand All @@ -74,11 +76,17 @@ patches = [
'PyTorch-2.9.1_ignore-warning-incompatible-pointer-types.patch',
'PyTorch-2.9.1_normalize_tree_output.patch',
'PyTorch-2.9.1_set-test-timeout.patch',
'PyTorch-2.9.1_skip-cutlass-addmm-test.patch',
'PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch',
'PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch',
'PyTorch-2.9.1_skip-RingFlexAttentionTest.patch',
'PyTorch-2.9.1_skip-test_dtensor_op_db_nn_functional_multi_head_attention_forward_cpu_float32.patch',
'PyTorch-2.9.1_skip-tests-requiring-SM90.patch',
'PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch',
'PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch',
'PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch',
'PyTorch-2.9.1_skip-test_optree_graph_break_message.patch',
'PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch',
]
checksums = [
{'pytorch-v2.9.1.tar.gz': 'e17504700ebc4c87f9b57059df1c4d790b769458c04db144c7a92aea90f2c92b'},
Expand Down Expand Up @@ -146,9 +154,12 @@ checksums = [
'5c68e0de73212ed266879f4528a6041ef7ab2f1ac83c6cf7142c4baa78e7664c'},
{'PyTorch-2.9.1_avoid-multiprocess-tests-hanging-forever.patch':
'86ce380e69b3b20e010d817889cb1b825b05b4054a045b00f2ac12161b77d7e4'},
{'PyTorch-2.9.1_avoid-using-wrong-libomp.patch':
'2fc2bb82cce87ba0ce73718b0502735ecdf360ca6bfac4482396f7f1c51c1866'},
{'PyTorch-2.9.1_check-device-avail-test_schedule.patch':
'64c28d38ce69147565509add36d310473ce46f14a0a876d38b5049cb7fce9817'},
{'PyTorch-2.9.1_disable-slow-tests.patch': '76e6d8f7366b91a0ddc65f73685f2b09988bb5537d10d294f9bb6a48c7fec3d0'},
{'PyTorch-2.9.1_disable-slow-tests.patch': '6b365a3531b0ac5446b5f0e8ab924b5e5742cd0331e6d9ec979118a3ef0ffc09'},
{'PyTorch-2.9.1_dont-print-test-items.patch': '2b524cf3d557c0672feefc3a7165e5555e549b0720647a84d546f769cea0be07'},
{'PyTorch-2.9.1_fix-hypothesis-deadline.patch': 'f7a130669eee9924a303df9e2bd5743ff023a7d994b7a3e43c86dcccf0206c49'},
{'PyTorch-2.9.1_fix-iteration-in-fligh-reporter.patch':
'ab408275ec66e836112a50054acc4e789ef38196efeb6137c6061d60d9ac9ead'},
Expand All @@ -161,6 +172,8 @@ checksums = [
'c4dad43a5d76e292bb0cada56ea05e8cbd522e3e83749cf3b2c15cd1e4ff6d7b'},
{'PyTorch-2.9.1_normalize_tree_output.patch': '7d5994580339b73c28de595d9e5a0448db97b7d284f17efd18909e4613d170df'},
{'PyTorch-2.9.1_set-test-timeout.patch': '15fa1149c250b1333b0bc491f659aaf89d5d6eaf6df5ebc81eea545478c1239c'},
{'PyTorch-2.9.1_skip-cutlass-addmm-test.patch':
'1f81a8a9eea8eda51fc93dff84cd994772febf4fd05d77efbf21b8440dadfd4e'},
{'PyTorch-2.9.1_skip-flex-attention-test_block_mask_non_divisible.patch':
'd8489c192da549083569e09e5f94d2a83c9e41e111b1322f86512a9c5a58c0d9'},
{'PyTorch-2.9.1_skip-flex-attention-tests-on-unsupported-cpus.patch':
Expand All @@ -171,6 +184,16 @@ checksums = [
'e57486cc42f3dbcae29753168febc251d070a283229e2d76ccbdf19fee53f06e'},
{'PyTorch-2.9.1_skip-tests-requiring-SM90.patch':
'7db02152db2ae70c0fd4c4602fe381e26a74b8e4f7b16b1a3554b2353d761b10'},
{'PyTorch-2.9.1_increase-tolerance-TestDecomp-matmul.patch':
'dd82203ce3b6262255aba6b59fb3b547c4c17875d5711f6d3d489aa8f0f59f32'},
{'PyTorch-2.9.1_skip-cpu_repro-tests-failing-on-ARM.patch':
'99055fde02ca17c1db1cd72f41821387a50901d6cd947161cafa12257b3a1c5a'},
{'PyTorch-2.9.1_skip-svd-pca-lowrank-tests-on-cpu.patch':
'4fc772293047dc737b99e232b8a8db904aa8e88e3c8b2bcc3602fb723941fb89'},
{'PyTorch-2.9.1_skip-test_optree_graph_break_message.patch':
'2ef1ad424d5f12a4d0ae06938da623819596cee7c0fb4616008f27583c29494d'},
{'PyTorch-2.9.1_skip-tests-requiring-MKLDNN.patch':
'03756a8069bad01018f422f41aa24c7c543519fd857db88a0c6de661976c8859'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]
Expand All @@ -183,7 +206,6 @@ builddependencies = [
('parameterized', '0.9.0'),
('pytest-flakefinder', '1.1.0'),
('pytest-rerunfailures', '15.0'),
('pytest-shard', '0.1.2'),
('pytest-subtests', '0.13.1'),
('tlparse', '0.4.0'),
('optree', '0.14.1'),
Expand Down Expand Up @@ -243,6 +265,9 @@ excluded_tests = {
'test_license',
# No triton
'distributed/test_nvshmem_triton',
# Occasional segfaults on CPU
'inductor/test_flex_attention',
'inductor/test_flex_decoding ',
]
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
When using GCC `libgomp.so` should be used which will be automatically done with `-fopenmp`.
However the custom FindOpenMP searches for `libomp.so` first which ends up being used if found
e.g. on the system in /lib64

See https://github.com/pytorch/pytorch/pull/177126

Author: Alexander Grund (TU Dresden)

diff --git a/cmake/Modules/FindOpenMP.cmake b/cmake/Modules/FindOpenMP.cmake
--- a/cmake/Modules/FindOpenMP.cmake
+++ b/cmake/Modules/FindOpenMP.cmake
@@ -289,21 +289,13 @@ function(_OPENMP_GET_FLAGS LANG FLAG_MODE OPENMP_FLAG_VAR OPENMP_LIB_NAMES_VAR)
mark_as_advanced(OpenMP_libomp_LIBRARY)
endif()

- if (NOT OpenMP_libomp_LIBRARY)
- find_library(OpenMP_libomp_LIBRARY
- NAMES omp gomp iomp5
- HINTS ${CMAKE_${LANG}_IMPLICIT_LINK_DIRECTORIES}
- DOC "libomp location for OpenMP"
- )
- mark_as_advanced(OpenMP_libomp_LIBRARY)
- endif()
-
# Use OpenMP_PREFIX if defined
if (NOT OpenMP_libomp_LIBRARY AND NOT "${OpenMP_PREFIX}" STREQUAL "")
find_library(OpenMP_libomp_LIBRARY
NAMES omp gomp iomp5
HINTS "${OpenMP_PREFIX}/lib"
DOC "libomp location for OpenMP"
+ NO_DEFAULT_PATH
)
mark_as_advanced(OpenMP_libomp_LIBRARY)
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ Enable the default for non-CI environments to cut down testing time.
Don't check for SANDCASTLE when determining whether to skip disabled tests.
However, the disabled-tests JSON file needs to be downloaded from S3 and placed at "tests/.pytorch-disabled-tests.json".

This file may be modified and/or redownloaded in import_test_stats.py
Disable this by just returning it's content as-if it is always up to date.
If it doesn't exist the failure will be handled by the calling function.
This modification removes the PR number field, so make it optional in the tuple expansion to allow either format.

Author: Alexander Grund (TU Dresden)

diff --git a/test/run_test.py b/test/run_test.py
index 44a15d4ab2c..269d4206f3e 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -531,7 +531,7 @@ def run_test(
Expand All @@ -20,8 +24,19 @@ index 44a15d4ab2c..269d4206f3e 100755
ci_args = ["--import-slow-tests", "--import-disabled-tests"]
if RERUN_DISABLED_TESTS:
ci_args.append("--rerun-disabled-tests")
diff --git a/tools/stats/import_test_stats.py b/tools/stats/import_test_stats.py
--- a/tools/stats/import_test_stats.py
+++ b/tools/stats/import_test_stats.py
@@ -47,6 +47,8 @@ def fetch_and_cache(
Path(dirpath).mkdir(exist_ok=True)

path = os.path.join(dirpath, name)
+ with open(path) as f:
+ return cast(dict[str, Any], json.load(f))
print(f"Downloading {url} to {path}")

def is_cached_file_valid() -> bool:
diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
index bfc568bc146..7ef37cccccb 100644
--- a/torch/testing/_internal/common_utils.py
+++ b/torch/testing/_internal/common_utils.py
@@ -2722,11 +2722,11 @@ def check_if_enable(test: unittest.TestCase):
Expand All @@ -32,9 +47,10 @@ index bfc568bc146..7ef37cccccb 100644
+ if True:
should_skip = False
skip_msg = ""
-
- for disabled_test, (issue_url, platforms) in disabled_tests_dict.items():
+ for disabled_test, (pr_num, issue_url, platforms) in disabled_tests_dict.items():
+ # Allow for a potentially existing PR number
+ for disabled_test, (*pr_num, issue_url, platforms) in disabled_tests_dict.items():
if matches_test(disabled_test):
platform_to_conditional: dict = {
"mac": IS_MACOS,
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Reduce verbosity of the test output by not showing all ~270k test names.

Author: Alexander Grund (TU Dresden)
diff --git a/test/run_test.py b/test/run_test.py
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -623,7 +623,7 @@ def run_test(
test_file,
)
else:
- command.extend([f"--sc={stepcurrent_key}", "--print-items"])
+ command.extend([f"--sc={stepcurrent_key}"])
ret_code, was_rerun = retry_shell(
command,
test_directory,
@@ -725,7 +725,7 @@ def run_test_retries(

num_failures = defaultdict(int)

- print_items = ["--print-items"]
+ print_items = []
sc_command = f"--sc={stepcurrent_key}"
while True:
ret_code, _ = retry_shell(
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

TestDecompCPU.test_comprehensive___rmatmul___cpu_float32, TestDecompCPU.test_comprehensive_matmul_cpu_float32 fail with small tolerance issues:
> Expected 12.534862518310547 but got 12.534895896911621.
> Absolute difference: 3.337860107421875e-05 (up to 1e-05 allowed)
> Relative difference: 2.6628613616990456e-06 (up to 1.3e-06 allowed)

Increase the tolerances slightly to make them pass.

Author: Alexander Grund (TU Dresden)

diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -14286,6 +14286,9 @@ op_db: list[OpInfo] = [
DecorateInfo(toleranceOverride({torch.float32: tol(atol=0, rtol=1e-5)}),
'TestCommon', 'test_noncontiguous_samples',
device_type='cpu'),
+ DecorateInfo(toleranceOverride({torch.float32: tol(atol=4e-5, rtol=3e-6)}),
+ "TestDecomp", "test_comprehensive", device_type="cpu",
+ ),
DecorateInfo(
toleranceOverride({
torch.float32: tol(atol=1e-5, rtol=1e-5),
@@ -17690,6 +17693,8 @@ op_db: list[OpInfo] = [
'TestMathBits', 'test_conj_view'),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-05, rtol=1.2e-03)}),
'TestCommon', 'test_noncontiguous_samples'),
+ DecorateInfo(toleranceOverride({torch.float32: tol(atol=4e-05, rtol=3e-06)}),
+ "TestDecomp", "test_comprehensive", device_type="cpu"),
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1e-05)}),
"TestDecomp", "test_comprehensive", device_type="cuda",
active_if=TEST_WITH_ROCM),
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
Those tests fail with precision issues on ARM which seems to be known:
https://github.com/pytorch/pytorch/pull/171095

Author: Alexander Grund (TU Dresden)

diff --git a/test/inductor/test_cpu_repro.py b/test/inductor/test_cpu_repro.py
--- a/test/inductor/test_cpu_repro.py
+++ b/test/inductor/test_cpu_repro.py
@@ -31,6 +31,7 @@ from torch.fx.experimental.proxy_tensor import make_fx
from torch.nn import functional as F
from torch.testing._internal.common_utils import (
instantiate_parametrized_tests,
+ IS_ARM64,
IS_FBCODE,
IS_MACOS,
parametrize,
@@ -3245,6 +3246,7 @@ class CPUReproTests(TestCase):
3,
)

+ @unittest.skipIf(IS_ARM64, "Fails on ARM")
@config.patch({"fx_graph_cache": False, "fx_graph_remote_cache": False})
def test_two_local_buffers_in_outer_loop_fusion(self):
def fn(x):
@@ -3568,6 +3570,7 @@ class CPUReproTests(TestCase):
self.common(m, (x,))
check_metrics_vec_kernel_count(6)

+ @unittest.skipIf(IS_ARM64, "Fails on ARM")
@requires_vectorization
@config.patch("cpp.enable_tiling_heuristics", False)
def test_transpose_copy(self):
@@ -3812,6 +3815,7 @@ class CPUReproTests(TestCase):
self.common(fn, (x, y))
check_metrics_vec_kernel_count(2)

+ @unittest.skipIf(IS_ARM64, "Fails on ARM")
def test_transpose_mxn_16_16_bf16_fp16(self):
def fn(a, b):
c = a * b
@@ -3885,6 +3889,7 @@ class CPUReproTests(TestCase):
x = torch.rand(4, 5)
self.common(f, (x,))

+ @unittest.skipIf(IS_ARM64, "Fails on ARM")
def test_broadcast_scalar_cpp_tile_2d_kernel(self):
# Based on detectron2_maskrcnn backbone (conv2d -> max_pool2d)
s0 = 12
@@ -4384,6 +4389,7 @@ class CPUReproTests(TestCase):
y = torch.randint(0, 255, (3, 3), dtype=torch.uint8)
self.common(fn, (x, y))

+ @unittest.skipIf(IS_ARM64, "Fails on ARM")
def test_float32_to_uint8(self):
# https://github.com/pytorch/pytorch/issues/156788
@torch.compile
@@ -4868,6 +4874,7 @@ class CPUReproTests(TestCase):
x = torch.randn(1, 4, 2, 2)
self.common(fn, (x,))

+ @unittest.skipIf(IS_ARM64, "Fails on ARM")
@parametrize("is_inference", (True, False))
def test_disabled_amp(self, is_inference):
class M(torch.nn.Module):
@@ -5367,6 +5374,7 @@ class CPUReproTests(TestCase):
code
)

+ @unittest.skipIf(IS_ARM64, "Fails on ARM")
@config.patch(freezing=True)
def test_add_layernorm(self):
class Model(torch.nn.Module):
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
The test fails with accuracy issues in at least H100, possibly on CUDA 12.8 in general.
See https://github.com/pytorch/pytorch/pull/156626

Author: Alexander Grund (TU Dresden)
diff --git a/test/inductor/test_cutlass_backend.py b/test/inductor/test_cutlass_backend.py
--- a/test/inductor/test_cutlass_backend.py
+++ b/test/inductor/test_cutlass_backend.py
@@ -613,7 +613,7 @@ class TestCutlassBackend(TestCase):

torch.testing.assert_close(actual, expected, rtol=1e-2, atol=0.05)

- @unittest.skipIf(not SM90OrLater, "need sm_90")
+ @unittest.skip("Fails on CUDA 12.8+")
@parametrize("dynamic", (False, True))
@parametrize("use_aoti", (False, True))
@parametrize("dtype", (torch.float16, torch.bfloat16))
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
On ARM those tests fail with
> torch._C._LinAlgError: linalg.svd: The algorithm failed to converge because the input matrix contained non-finite values.

Traced to OpenBLAS with a fix in OpenBLAS 0.3.30, see https://github.com/pytorch/pytorch/issues/142131

Author: Alexander Grund (TU Dresden)

diff --git a/test/test_linalg.py b/test/test_linalg.py
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -2674,6 +2674,7 @@ class TestLinalg(TestCase):
self.assertRaisesRegex(RuntimeError, "must be different", torch.norm, x, "nuc", (0, 0))
self.assertRaisesRegex(IndexError, "Dimension out of range", torch.norm, x, "nuc", (0, 2))

+ @onlyCUDA
@skipCUDAIfNoCusolver
@skipCPUIfNoLapack
@dtypes(torch.double, torch.cdouble)
@@ -9383,6 +9384,7 @@ scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2:

run_test((1, 1), (1, 1, 1025))

+ @onlyCUDA
@skipCUDAIfNoCusolver
@skipCPUIfNoLapack
def test_pca_lowrank(self, device):
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Test fails with output mismatch:
> - Explanation: Dynamo cannot trace optree C/C++ function optree._C.PyCapsule.flatten.
> + Explanation: Dynamo cannot trace optree C/C++ function optree._C.pybind11_detail_function_record_v1_system_libstdcpp_gxx_abi_1xxx_use_cxx11_abi_1.flatten.
> Hint: Consider using torch.utils._pytree - https://github.com/pytorch/pytorch/blob/main/torch/utils/_pytree.py
>
> - Developer debug context: module: optree._C, qualname: PyCapsule.flatten, skip reason: <missing reason>
> + Developer debug context: module: optree._C, qualname: pybind11_detail_function_record_v1_system_libstdcpp_gxx_abi_1xxx_use_cxx11_abi_1.flatten, skip reason: <missing reason>

Seems to be related to pybind11 version, GCC version, ...

Author: Alexander Grund (TU Dresden)

diff --git a/test/dynamo/test_error_messages.py b/test/dynamo/test_error_messages.py
--- a/test/dynamo/test_error_messages.py
+++ b/test/dynamo/test_error_messages.py
@@ -461,7 +461,7 @@ from user code:
warnings.warn("test")""",
)

- @unittest.skipIf(not python_pytree._cxx_pytree_exists, "missing optree package")
+ @unittest.skip("Failes depending on Pybind11/GCC versions")
def test_optree_graph_break_message(self):
import optree

Loading
Loading