Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions easybuild/easyconfigs/o/optree/optree-0.11.0-GCCcore-13.2.0.eb
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

optree requires typing-extensions/4.10.0-GCCcore-13.2.0

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is that? I installed it just fine:

... python -m pip check completed successfully

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are you getting typing-extensions? It is not part of Python-3.11.5-GCCcore-13.2.0.eb. optree build fails without typing-extensions.

== installing...
== ... (took 29 secs)
== taking care of extensions...
== restore after iterating...
== postprocessing...
== sanity checking...
== ... (took 3 secs)
== FAILED: Installation ended unsuccessfully (build directory: /build/optree/0.11.0/GCCcore-13.2.0): build failed (first 300 chars): `/app/software/Python/3.11.5-GCCcore-13.2.0/bin/python -m pip check` failed:
optree 0.11.0 requires typing-extensions, which is not installed.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like you need to reinstall Python. The current develop version and release 4.9.1 contains it:

('typing_extensions', '4.8.0', {
'checksums': ['df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef'],
}),

However it was a change between 4.8.2 and 4.9.x by #19777

From the looks of that PR this was made because too many other ECs depended on that. And IMO it makes sense to include it in Python by default

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, --rebuild --skip added four packages. This will fix many things for me.

== installing extension tomli 2.0.1 (1/4)...
==      configuring...
==      building...
==      testing...
==      installing...
==      ... (took 11 secs)
== installing extension packaging 23.2 (2/4)...
==      configuring...
==      building...
==      testing...
==      installing...
==      ... (took 2 secs)
== installing extension typing_extensions 4.8.0 (3/4)...
==      configuring...
==      building...
==      testing...
==      installing...
==      ... (took 2 secs)
== installing extension setuptools-scm 8.0.4 (4/4)...

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
easyblock = 'PythonPackage'

name = 'optree'
version = '0.11.0'

homepage = 'https://optree.readthedocs.io/en/latest/'
description = "Optimized PyTree Utilities"

toolchain = {'name': 'GCCcore', 'version': '13.2.0'}

github_account = 'metaopt'
source_urls = [GITHUB_SOURCE]
sources = ['v%(version)s.tar.gz']
checksums = ['34dcf30681b04d705a5d832a996c514e2590aae55cff281fd79039b8b72c31c8']

builddependencies = [
('binutils', '2.40'),
('CMake', '3.27.6'),
]

dependencies = [
('Python', '3.11.5'),
]

use_pip = True
sanity_pip_check = True
download_dep_fail = True

moduleclass = 'lib'
170 changes: 170 additions & 0 deletions easybuild/easyconfigs/p/PyTorch/PyTorch-2.3.0-foss-2023b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
name = 'PyTorch'
version = '2.3.0'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2023b'}

source_urls = [GITHUB_RELEASE]
sources = ['%(namelower)s-v%(version)s.tar.gz']
patches = [
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.12.1_add-hypothesis-suppression.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch',
'PyTorch-1.13.1_fix-protobuf-dependency.patch',
'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch',
'PyTorch-1.13.1_skip-failing-singular-grad-test.patch',
'PyTorch-1.13.1_skip-tests-without-fbgemm.patch',
'PyTorch-2.0.1_avoid-test_quantization-failures.patch',
'PyTorch-2.0.1_fix-skip-decorators.patch',
'PyTorch-2.0.1_fix-vsx-loadu.patch',
'PyTorch-2.0.1_skip-failing-gradtest.patch',
'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch',
'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch',
'PyTorch-2.1.0_fix-vsx-vector-shift-functions.patch',
'PyTorch-2.1.0_increase-tolerance-functorch-test_vmapvjpvjp.patch',
'PyTorch-2.1.0_remove-test-requiring-online-access.patch',
'PyTorch-2.1.0_skip-diff-test-on-ppc.patch',
'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch',
'PyTorch-2.1.0_skip-test_jvp_linalg_det_singular.patch',
'PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch',
'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch',
'PyTorch-2.3.0_disable_test_linear_package_if_no_half_types_are_available.patch',
'PyTorch-2.3.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch',
'PyTorch-2.3.0_skip-test_init_from_local_shards.patch',
'PyTorch-2.3.0_no-cuda-stubs-rpath.patch',
'PyTorch-2.3.0_disable-gcc12-warning.patch',
'PyTorch-2.3.0_fix-test_extension_backend-without-vectorization.patch',
'PyTorch-2.3.0_disable_tests_which_need_network_download.patch',
]
checksums = [
{'pytorch-v2.3.0.tar.gz': '69579513b26261bbab32e13b7efc99ad287fcf3103087f2d4fdf1adacd25316f'},
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
{'PyTorch-1.12.1_add-hypothesis-suppression.patch':
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
{'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch':
'5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'},
{'PyTorch-1.13.1_fix-protobuf-dependency.patch':
'8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'},
{'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch':
'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'},
{'PyTorch-1.13.1_skip-failing-singular-grad-test.patch':
'72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'},
{'PyTorch-1.13.1_skip-tests-without-fbgemm.patch':
'481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'},
{'PyTorch-2.0.1_avoid-test_quantization-failures.patch':
'02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'},
{'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'},
{'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'},
{'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'},
{'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch':
'7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'},
{'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch':
'166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'},
{'PyTorch-2.1.0_fix-vsx-vector-shift-functions.patch':
'3793b4b878be1abe7791efcbd534774b87862cfe7dc4774ca8729b6cabb39e7e'},
{'PyTorch-2.1.0_increase-tolerance-functorch-test_vmapvjpvjp.patch':
'aef38adf1210d0c5455e91d7c7a9d9e5caad3ae568301e0ba9fc204309438e7b'},
{'PyTorch-2.1.0_remove-test-requiring-online-access.patch':
'35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'},
{'PyTorch-2.1.0_skip-diff-test-on-ppc.patch': '394157dbe565ffcbc1821cd63d05930957412156cc01e949ef3d3524176a1dda'},
{'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch':
'6298daf9ddaa8542850eee9ea005f28594ab65b1f87af43d8aeca1579a8c4354'},
{'PyTorch-2.1.0_skip-test_jvp_linalg_det_singular.patch':
'5229ca88a71db7667a90ddc0b809b2c817698bd6e9c5aaabd73d3173cf9b99fe'},
{'PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch':
'7ace835af60c58d9e0754a34c19d4b9a0c3a531f19e5d0eba8e2e49206eaa7eb'},
{'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch':
'fb96eefabf394617bbb3fbd3a7a7c1aa5991b3836edc2e5d2a30e708bfe49ba1'},
{'PyTorch-2.3.0_disable_test_linear_package_if_no_half_types_are_available.patch':
'23416f2d9d5226695ec3fbea0671e3650c655c19deefd3f0f8ddab5afa50f485'},
{'PyTorch-2.3.0_disable_DataType_dependent_test_if_tensorboard_is_not_available.patch':
'0dcbdfde6752c3ff54c5376f521b4a742167669feb7f0f1d4e1d4d55f72b664f'},
{'PyTorch-2.3.0_skip-test_init_from_local_shards.patch':
'90ed9c2870f57ee6dc032d00873a37e2217a2b92a13035ded1c25ad5306455f2'},
{'PyTorch-2.3.0_no-cuda-stubs-rpath.patch':
'7ba26824b5def7379cff02ae821a080698e6affea0da45bc846e9ecb89939cb1'},
{'PyTorch-2.3.0_disable-gcc12-warning.patch':
'a8a624e1a2a5f4c82610173e50bd0f853e49bd5621b432f5aac689f9f6eb1514'},
{'PyTorch-2.3.0_fix-test_extension_backend-without-vectorization.patch':
'36aa2d5ba175be17f4e996f4fb2d544fe477d4a0bd0644cd59a85063779afc8e'},
{'PyTorch-2.3.0_disable_tests_which_need_network_download.patch':
'b7fd1a5135dfd4098cdc054182f7bf84a23ac98462a00477712182b5442da855'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.27.6'),
('hypothesis', '6.90.0'),
# For tests
('pytest-flakefinder', '1.1.0'),
('pytest-rerunfailures', '14.0'),
('pytest-shard', '0.1.2'),
('tlparse', '0.3.5'),
('optree', '0.11.0'),
]

dependencies = [
('Ninja', '1.11.1'), # Required for JIT compilation of C++ extensions
('Python', '3.11.5'),
('Python-bundle-PyPI', '2023.10'),
('protobuf', '25.3'),
('protobuf-python', '4.25.3'),
('pybind11', '2.11.1'),
('SciPy-bundle', '2023.11'),
('PyYAML', '6.0.1'),
('MPFR', '4.2.1'),
('GMP', '6.3.0'),
('numactl', '2.0.16'),
('FFmpeg', '6.0'),
('Pillow', '10.2.0'),
('expecttest', '0.2.1'),
('networkx', '3.2.1'),
('sympy', '1.12'),
('Z3', '4.13.0',),
]

use_pip = True
buildcmd = '%(python)s setup.py build' # Run the (long) build in the build step

excluded_tests = {
'': [
# This test seems to take too long on NVIDIA Ampere at least.
'distributed/test_distributed_spawn',
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
'distributions/test_constraints',
# no xdoctest
'doctests',
# failing on broadwell
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'test_native_mha',
# intermittent failures on various systems
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'distributed/rpc/test_tensorpipe_agent',
# This test is expected to fail when run in their CI, but won't in our case.
# It just checks for a "CI" env variable
'test_ci_sanity_check_fail',

]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'
Comment thread
akesandgren marked this conversation as resolved.
Outdated

# Especially test_quantization has a few corner cases that are triggered by the random input values,
# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030
# So allow a low number of tests to fail as the tests "usually" succeed
max_failed_tests = 6

tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
GCC 12 has a false positive warning when compiled for some architectures, e.g. Intel Sapphire Rapids.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112370

Suppress this warning such that the build doesn't error.

Author: Alexander Grund (TU Dresden)

Ported to 2.3.0, Åke Sandgren
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b74bf4536f4..bb062fa843a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -573,6 +573,7 @@ if(MSVC)
string(APPEND CMAKE_CXX_FLAGS " /FS")
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /FS")
endif(MSVC)
+append_cxx_flag_if_supported("-Wno-free-nonheap-object" CMAKE_CXX_FLAGS)

string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")

Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
Disable use of DataType in test when tensorboard module is not available
The corresponding import is already protected

Åke Sandgren, 2024-05-07
diff --git a/test/test_tensorboard.py b/test/test_tensorboard.py
index 3ce2ab2a172..141f09dc220 100644
--- a/test/test_tensorboard.py
+++ b/test/test_tensorboard.py
@@ -829,64 +829,65 @@ class TestTensorBoardNumpy(BaseTestCase):
)
compare_proto(graph, self)

-class TestTensorProtoSummary(BaseTestCase):
- @parametrize(
- "tensor_type,proto_type",
- [
- (torch.float16, DataType.DT_HALF),
- (torch.bfloat16, DataType.DT_BFLOAT16),
- ],
- )
- def test_half_tensor_proto(self, tensor_type, proto_type):
- float_values = [1.0, 2.0, 3.0]
- actual_proto = tensor_proto(
- "dummy",
- torch.tensor(float_values, dtype=tensor_type),
- ).value[0].tensor
- self.assertSequenceEqual(
- [int_to_half(x) for x in actual_proto.half_val],
- float_values,
+if TEST_TENSORBOARD:
+ class TestTensorProtoSummary(BaseTestCase):
+ @parametrize(
+ "tensor_type,proto_type",
+ [
+ (torch.float16, DataType.DT_HALF),
+ (torch.bfloat16, DataType.DT_BFLOAT16),
+ ],
)
- self.assertTrue(actual_proto.dtype == proto_type)
+ def test_half_tensor_proto(self, tensor_type, proto_type):
+ float_values = [1.0, 2.0, 3.0]
+ actual_proto = tensor_proto(
+ "dummy",
+ torch.tensor(float_values, dtype=tensor_type),
+ ).value[0].tensor
+ self.assertSequenceEqual(
+ [int_to_half(x) for x in actual_proto.half_val],
+ float_values,
+ )
+ self.assertTrue(actual_proto.dtype == proto_type)

- def test_float_tensor_proto(self):
- float_values = [1.0, 2.0, 3.0]
- actual_proto = (
- tensor_proto("dummy", torch.tensor(float_values)).value[0].tensor
- )
- self.assertEqual(actual_proto.float_val, float_values)
- self.assertTrue(actual_proto.dtype == DataType.DT_FLOAT)
-
- def test_int_tensor_proto(self):
- int_values = [1, 2, 3]
- actual_proto = (
- tensor_proto("dummy", torch.tensor(int_values, dtype=torch.int32))
- .value[0]
- .tensor
- )
- self.assertEqual(actual_proto.int_val, int_values)
- self.assertTrue(actual_proto.dtype == DataType.DT_INT32)
+ def test_float_tensor_proto(self):
+ float_values = [1.0, 2.0, 3.0]
+ actual_proto = (
+ tensor_proto("dummy", torch.tensor(float_values)).value[0].tensor
+ )
+ self.assertEqual(actual_proto.float_val, float_values)
+ self.assertTrue(actual_proto.dtype == DataType.DT_FLOAT)
+
+ def test_int_tensor_proto(self):
+ int_values = [1, 2, 3]
+ actual_proto = (
+ tensor_proto("dummy", torch.tensor(int_values, dtype=torch.int32))
+ .value[0]
+ .tensor
+ )
+ self.assertEqual(actual_proto.int_val, int_values)
+ self.assertTrue(actual_proto.dtype == DataType.DT_INT32)

- def test_scalar_tensor_proto(self):
- scalar_value = 0.1
- actual_proto = (
- tensor_proto("dummy", torch.tensor(scalar_value)).value[0].tensor
- )
- self.assertAlmostEqual(actual_proto.float_val[0], scalar_value)
+ def test_scalar_tensor_proto(self):
+ scalar_value = 0.1
+ actual_proto = (
+ tensor_proto("dummy", torch.tensor(scalar_value)).value[0].tensor
+ )
+ self.assertAlmostEqual(actual_proto.float_val[0], scalar_value)

- def test_complex_tensor_proto(self):
- real = torch.tensor([1.0, 2.0])
- imag = torch.tensor([3.0, 4.0])
- actual_proto = (
- tensor_proto("dummy", torch.complex(real, imag)).value[0].tensor
- )
- self.assertEqual(actual_proto.scomplex_val, [1.0, 3.0, 2.0, 4.0])
+ def test_complex_tensor_proto(self):
+ real = torch.tensor([1.0, 2.0])
+ imag = torch.tensor([3.0, 4.0])
+ actual_proto = (
+ tensor_proto("dummy", torch.complex(real, imag)).value[0].tensor
+ )
+ self.assertEqual(actual_proto.scomplex_val, [1.0, 3.0, 2.0, 4.0])

- def test_empty_tensor_proto(self):
- actual_proto = tensor_proto("dummy", torch.empty(0)).value[0].tensor
- self.assertEqual(actual_proto.float_val, [])
+ def test_empty_tensor_proto(self):
+ actual_proto = tensor_proto("dummy", torch.empty(0)).value[0].tensor
+ self.assertEqual(actual_proto.float_val, [])

-instantiate_parametrized_tests(TestTensorProtoSummary)
+ instantiate_parametrized_tests(TestTensorProtoSummary)

if __name__ == '__main__':
run_tests()
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Disable test_linear_package if neither bf16 nor fp16 are available on the CPU

Åke Sandgren, 2024-05-06
diff --git a/test/inductor/test_cpu_repro.py b/test/inductor/test_cpu_repro.py
index dfc453220a9..ae1613ad62e 100644
--- a/test/inductor/test_cpu_repro.py
+++ b/test/inductor/test_cpu_repro.py
@@ -362,6 +362,7 @@ class CPUReproTests(TestCase):
self.common(Model(), example_inputs)

@unittest.skipIf(not torch.backends.mkldnn.is_available(), "MKLDNN is not enabled")
+ @unittest.skipIf(not (torch.ops.mkldnn._is_mkldnn_bf16_supported() or torch.ops.mkldnn._is_mkldnn_fp16_supported()), "MKLDNN neither fp16 nor bf16 are available")
@patch("torch.cuda.is_available", lambda: False)
def test_linear_packed(self):
dtypes = []
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Disable tests that requires external downloads

Åke Sandgren, 2024-05-14
diff --git a/test/test_hub.py b/test/test_hub.py
index a0c0c315c4e..6427d02045f 100644
--- a/test/test_hub.py
+++ b/test/test_hub.py
@@ -23,6 +23,7 @@ TORCHHUB_EXAMPLE_RELEASE_URL = 'https://github.com/ailzhang/torchhub_example/rel


@unittest.skipIf(IS_SANDCASTLE, 'Sandcastle cannot ping external')
+@unittest.skipIf(True, 'EasyBuild: do not want tests to depend on downloading')
class TestHub(TestCase):

def setUp(self):
diff --git a/test/test_nn.py b/test/test_nn.py
index 66fc4cdc466..ed5d1ecc765 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -151,6 +151,7 @@ class TestNN(NNTestCase):
for b in m.buffers():
self.assertFalse(b.requires_grad)

+ @unittest.skipIf(True, 'EasyBuild: do not want tests to depend on downloading')
def test_module_backcompat(self):
from torch.serialization import SourceChangeWarning
path = download_file('https://download.pytorch.org/test_data/linear.pt')
Loading