From ca278d848efa390bc1e20f3a922e151e4dfebf08 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Thu, 26 Jun 2025 16:18:57 +0200
Subject: [PATCH 01/20] Use dict.items

---
 test/easyblocks/easyblock_specific.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/easyblocks/easyblock_specific.py b/test/easyblocks/easyblock_specific.py
index e18c7b93ee1..2c2de72ec42 100644
--- a/test/easyblocks/easyblock_specific.py
+++ b/test/easyblocks/easyblock_specific.py
@@ -495,8 +495,8 @@ def test_translate_lammps_version(self):
             '29Aug2024_update2': '2024.08.29',
             '28Oct2024': '2024.10.28',
         }
-        for key in lammps_versions:
-            self.assertEqual(lammps.translate_lammps_version(key), lammps_versions[key])
+        for key, expected_version in lammps_versions.items():
+            self.assertEqual(lammps.translate_lammps_version(key), expected_version)
 
         version_file = os.path.join(self.tmpdir, 'src', 'version.h')
         version_txt = '\n'.join([

From 3ff46c0d4cb350768976b843d4909a3f014b3328 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Thu, 26 Jun 2025 15:21:40 +0200
Subject: [PATCH 02/20] Allow rerun and skipped tests

---
 easybuild/easyblocks/p/pytorch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index fdc787155c6..a53408ace36 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -905,8 +905,8 @@ def parse_test_cases(test_suite_el: ET.Element) -> List[TestCase]:
         num_reruns = len(testcase.findall("rerun"))
 
         if skipped:
-            if num_reruns > 0 or failed or errored:
-                raise ValueError(f"Invalid state for testcase '{test_name}'")
+            if failed or errored:
+                raise ValueError(f"Invalid state for testcase '{test_name}': Both skipped and failed/errored")
             state = TestState.SKIPPED
         else:
             state = TestState.FAILURE if failed else TestState.ERROR if errored else TestState.SUCCESS

From a609de210efbc817b37f6205ce1c94c44f12394a Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Thu, 26 Jun 2025 15:22:26 +0200
Subject: [PATCH 03/20] Fix trimming test case name

The code parses class names if they start with the prefix 'test.'
and then trims a prefix consisting of the common part.
That common part specifically excludes the 'test.' part of the prefix
which hence needs to be re-added to match with `startswith`.
---
 easybuild/easyblocks/p/pytorch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index a53408ace36..93f09dc76a0 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -964,7 +964,7 @@ def extract_path(classname: str) -> str:
         # We can remove possible class names by only using the common part
         suite_name = os.path.commonpath(possible_paths)
         # Strip of common prefix to all classes, but keep the last part for uniqueness
-        non_classname_prefix = os.path.dirname(suite_name).replace(os.path.sep, '.') + '.'
+        non_classname_prefix = 'test.' + os.path.dirname(suite_name).replace(os.path.sep, '.') + '.'
         for testcase in test_cases:
             classname = testcase.attrib["classname"]
             if classname.startswith(non_classname_prefix):

From f3699b48608b2e9f435d68c1ce30bffcb6a607a6 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Thu, 26 Jun 2025 15:54:06 +0200
Subject: [PATCH 04/20] Add test for PyTorch log parsing

Select some test reports from real runs, trim them down a bit and parse
them in the test.
Provide script for automatic cleanup of test reports.
---
 test/easyblocks/easyblock_specific.py         | 155 ++++++++++++++++++
 test/pytorch_test_logs/README.md              |   5 +
 test/pytorch_test_logs/cleanup_files.py       | 125 ++++++++++++++
 .../consistency/test_name/test_name-1.xml     |  15 ++
 .../sync.skip.test_foo-1.xml                  |   7 +
 .../duplicate/test_name/test_name-1.xml       |   7 +
 .../TEST-foo.test_name.TestName-1.xml         |   7 +
 .../multi_file/test_name/TEST-Name-1.xml      |   8 +
 .../no_tests/test_name/test_name-1.xml        |   6 +
 .../root/test_name/test_name-1.xml            |   3 +
 .../skip_and_failed/test_name/test_name-1.xml |   9 +
 ...s.quantization.test_quantization-1d671.xml |   6 +
 ...s.quantization.test_quantization-78879.xml |   6 +
 ...s.quantization.test_quantization-5f224.xml |   6 +
 ...s.quantization.test_quantization-d5cb5.xml |   6 +
 .../backends.xeon.test_launch-1.xml           |  26 +++
 .../backends.xeon.test_launch-2.xml           |   8 +
 .../backends.xeon.test_launch-3.xml           |   6 +
 ...tributed.tensor.test_dtensor_ops-2fe9b.xml |  17 ++
 .../dynamo.test_dynamic_shapes-189f6.xml      |  20 +++
 .../dynamo.test_dynamic_shapes-266ee.xml      |  16 ++
 .../dynamo.test_dynamic_shapes-3f6e0.xml      |  13 ++
 .../dynamo.test_misc-18930.xml                |  11 ++
 .../dynamo.test_misc-86d5b.xml                |  16 ++
 .../dynamo.test_misc-d062d.xml                |  20 +++
 .../python-pytest/run_test/run_test.xml       |  11 ++
 .../test_nestedtensor-671fe.xml               |   8 +
 .../test_nestedtensor-8e17a.xml               |  26 +++
 .../test_quantization-3146b.xml               |   6 +
 .../test_quantization-97a67.xml               |  36 ++++
 .../TEST-jit.test_builtins.TestBuiltins-1.xml |   4 +
 .../test_autoload/TEST-TestBackend-1.xml      |   4 +
 .../test_autoload/TEST-TestBackend-2.xml      |   7 +
 33 files changed, 626 insertions(+)
 create mode 100644 test/pytorch_test_logs/README.md
 create mode 100755 test/pytorch_test_logs/cleanup_files.py
 create mode 100644 test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml
 create mode 100644 test/pytorch_test_logs/faulty-reports/different_file_name/sync.skip.test_name/sync.skip.test_foo-1.xml
 create mode 100644 test/pytorch_test_logs/faulty-reports/duplicate/test_name/test_name-1.xml
 create mode 100644 test/pytorch_test_logs/faulty-reports/file_attribute/test_name/TEST-foo.test_name.TestName-1.xml
 create mode 100644 test/pytorch_test_logs/faulty-reports/multi_file/test_name/TEST-Name-1.xml
 create mode 100644 test/pytorch_test_logs/faulty-reports/no_tests/test_name/test_name-1.xml
 create mode 100644 test/pytorch_test_logs/faulty-reports/root/test_name/test_name-1.xml
 create mode 100644 test/pytorch_test_logs/faulty-reports/skip_and_failed/test_name/test_name-1.xml
 create mode 100644 test/pytorch_test_logs/test-reports/dist-gloo-init-env/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-1d671.xml
 create mode 100644 test/pytorch_test_logs/test-reports/dist-gloo-init-file/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-78879.xml
 create mode 100644 test/pytorch_test_logs/test-reports/dist-nccl-init-env/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-5f224.xml
 create mode 100644 test/pytorch_test_logs/test-reports/dist-nccl-init-file/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-d5cb5.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-1.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-2.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-3.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/distributed.tensor.test_dtensor_ops/distributed.tensor.test_dtensor_ops-2fe9b.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-189f6.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-266ee.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-3f6e0.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-18930.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-86d5b.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-d062d.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/run_test/run_test.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/test_nestedtensor/test_nestedtensor-671fe.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/test_nestedtensor/test_nestedtensor-8e17a.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/test_quantization/test_quantization-3146b.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/test_quantization/test_quantization-97a67.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-unittest/jit.test_builtins/TEST-jit.test_builtins.TestBuiltins-1.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-unittest/test_autoload/TEST-TestBackend-1.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-unittest/test_autoload/TEST-TestBackend-2.xml

diff --git a/test/easyblocks/easyblock_specific.py b/test/easyblocks/easyblock_specific.py
index 2c2de72ec42..023087b8499 100644
--- a/test/easyblocks/easyblock_specific.py
+++ b/test/easyblocks/easyblock_specific.py
@@ -35,6 +35,7 @@
 import tempfile
 import textwrap
 from io import StringIO
+from pathlib import Path
 from unittest import TestLoader, TextTestRunner
 from test.easyblocks.module import cleanup
 
@@ -42,6 +43,7 @@
 import easybuild.easyblocks.generic.pythonpackage as pythonpackage
 import easybuild.easyblocks.l.lammps as lammps
 import easybuild.easyblocks.p.python as python
+import easybuild.easyblocks.p.pytorch as pytorch
 from easybuild.base.testing import TestCase
 from easybuild.easyblocks.generic.cmakemake import det_cmake_version
 from easybuild.easyblocks.generic.toolchain import Toolchain
@@ -508,6 +510,159 @@ def test_translate_lammps_version(self):
         self.assertEqual(lammps.translate_lammps_version('d3adb33f', path=self.tmpdir), '2025.04.02')
         self.assertEqual(lammps.translate_lammps_version('devel', path=self.tmpdir), '2025.04.02')
 
+    def test_pytorch_test_log_parsing(self):
+        """Verify parsing of XML files produced by PyTorch tests."""
+        TestState = pytorch.TestState
+
+        test_log_dir = Path(__file__).parent.parent / 'pytorch_test_logs'
+
+        results = pytorch.get_test_results(test_log_dir / 'test-reports')
+        results2 = pytorch.get_test_results(test_log_dir)
+        self.assertEqual(results.keys(), results2.keys())
+        for name, suite in results.items():
+            self.assertEqual((name, suite.summary), (name, results2[name].summary))
+        del results2
+
+        self.assertEqual(len(results), 13)
+
+        # 2 small test suites used as a smoke test using a most features
+        self.assertIn('backends/xeon/test_launch', results)
+        suite = results['backends/xeon/test_launch']
+        self.assertEqual((suite.errors, suite.failures, suite.num_tests, suite.skipped), (1, 2, 8, 3))
+        # Failure in one file, success in the other --> Success
+        self.assertEqual(suite['TestTorchrun.test_cpu_info'].state, TestState.SUCCESS)
+        # New in 2nd file
+        self.assertEqual(suite['TestTorchrun.test_multi_threads'].state, TestState.SUCCESS)
+        self.assertEqual(suite['TestTorchrun.test_reshape_cpu_float64'].state, TestState.FAILURE)
+        self.assertEqual(suite['TestTorchrun.test_foo'].state, TestState.SKIPPED)
+        self.assertEqual(suite['TestTorchrun.test_bar'].state, TestState.ERROR)
+        self.assertEqual(suite.get_errored_tests(), ['TestTorchrun.test_bar'])
+        self.assertEqual(suite.get_failed_tests(), ['TestTorchrun.test_reshape_cpu_float64', 'TestTorchrun.test_baz'])
+        self.assertIn('test_autoload', results)
+        suite = results['test_autoload']
+        self.assertEqual((suite.errors, suite.failures, suite.num_tests, suite.skipped), (0, 0, 2, 1))
+        self.assertEqual(suite['TestBackendAutoload.test_autoload'].state, TestState.SUCCESS)
+        self.assertEqual(suite['TestBackendAutoload.test_unload'].state, TestState.SKIPPED)
+
+        # Verify summaries which should be enough to catch most issues
+        report = '\n'.join(sorted(f'{suite.name}: {suite.summary}' for suite in results.values()))
+        self.assertEqual(report, textwrap.dedent("""
+            backends/xeon/test_launch: 2 failed, 2 passed, 3 skipped, 1 errors
+            dist-gloo-init-env/distr/algorithms/quantization/test_quantization: 0 failed, 1 passed, 0 skipped, 0 errors
+            dist-gloo-init-file/distr/algorithms/quantization/test_quantization: 0 failed, 1 passed, 0 skipped, 0 errors
+            dist-nccl-init-env/distr/algorithms/quantization/test_quantization: 0 failed, 1 passed, 0 skipped, 0 errors
+            dist-nccl-init-file/distr/algorithms/quantization/test_quantization: 0 failed, 1 passed, 0 skipped, 0 errors
+            dist/foo/bar: 0 failed, 4 passed, 0 skipped, 0 errors
+            distributed/tensor/test_dtensor_ops: 0 failed, 2 passed, 2 skipped, 0 errors
+            dynamo/test_dynamic_shapes: 3 failed, 14 passed, 0 skipped, 0 errors
+            dynamo/test_misc: 1 failed, 9 passed, 0 skipped, 0 errors
+            jit/test_builtins: 0 failed, 1 passed, 0 skipped, 0 errors
+            test_autoload: 0 failed, 1 passed, 1 skipped, 0 errors
+            test_nestedtensor: 3 failed, 2 passed, 3 skipped, 1 errors
+            test_quantization: 0 failed, 12 passed, 5 skipped, 0 errors
+        """).strip())
+        tests = '\n'.join(sorted(f'{test.name}: {test.state.value}'
+                                 for suite in results.values()
+                                 for test in suite.get_tests()))
+        self.assertEqual(tests, textwrap.dedent("""
+            DistQuantizationTests.test_all_gather_fp16: success
+            DistQuantizationTests.test_all_gather_fp16: success
+            DistQuantizationTests.test_all_gather_fp16: success
+            DistQuantizationTests.test_all_gather_fp16: success
+            DynamicShapesCtxManagerTests.test_autograd_profiler_dynamic_shapes: success
+            DynamicShapesCtxManagerTests.test_generic_context_manager_with_graph_break_dynamic_shapes: success
+            DynamicShapesCtxManagerTests.test_generic_ctx_manager_with_graph_break_dynamic_shapes: success
+            DynamicShapesMiscTests.test_outside_linear_module_free_dynamic_shapes: failure
+            DynamicShapesMiscTests.test_packaging_version_parse_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pair_dynamic_shapes: success
+            DynamicShapesMiscTests.test_param_shape_binops_dynamic_shapes: success
+            DynamicShapesMiscTests.test_parameter_free_dynamic_shapes: failure
+            DynamicShapesMiscTests.test_patched_builtin_functions_dynamic_shapes: success
+            DynamicShapesMiscTests.test_proxy_frozen_dataclass_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pt2_compliant_ops_are_allowed_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pt2_compliant_overload_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pure_python_accumulate_dynamic_shapes: success
+            DynamicShapesMiscTests.test_py_guards_mark_dynamic_dynamic_shapes: success
+            DynamicShapesMiscTests.test_python_slice_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pytree_tree_flatten_unflatten_dynamic_shapes: success
+            DynamicShapesMiscTests.test_pytree_tree_leaves_dynamic_shapes: failure
+            MiscTests.test_packaging_version_parse: success
+            MiscTests.test_pair: success
+            MiscTests.test_param_shape_binops: success
+            MiscTests.test_parameter_free: failure
+            MiscTests.test_pytree_tree_map: success
+            MiscTests.test_shape_env_no_recording: success
+            MiscTests.test_shape_env_recorded_function_fallback: success
+            MiscTests.test_yield_from_in_a_loop: success
+            TestBackendAutoload.test_autoload: success
+            TestBackendAutoload.test_unload: skipped
+            TestBuiltins.test_name: success
+            TestCustomFunction.test_autograd_function_with_matmul_folding_at_output: success
+            TestDTensorOpsCPU.test_dtensor_op_db_H_cpu_float16: success
+            TestDTensorOpsCPU.test_dtensor_op_db_H_cpu_float32: success
+            TestDTensorOpsCPU.test_dtensor_op_db_H_cpu_float64: skipped
+            TestDTensorOpsCPU.test_dtensor_op_db_H_cpu_int8: skipped
+            TestDynamicQuantizedOps.test_qrnncell: success
+            TestFakeQuantizeOps.test_backward_per_channel: skipped
+            TestFakeQuantizeOps.test_backward_per_channel_cachemask_cpu: success
+            TestFakeQuantizeOps.test_backward_per_channel_cachemask_cuda: success
+            TestName.test_bar: success
+            TestNestedTensor.test_bmm_cuda_gpu_float16: failure
+            TestNestedTensor.test_bmm_cuda_gpu_float32: failure
+            TestNestedTensor.test_bmm_cuda_gpu_float64: error
+            TestNestedTensor.test_cat: success
+            TestNestedTensor.test_copy_: success
+            TestNestedTensor.test_reshape_cpu_float16: skipped
+            TestNestedTensor.test_reshape_cpu_float32: skipped
+            TestNestedTensor.test_reshape_cpu_float64: failure
+            TestNestedTensorSubclassCPU.test_linear_backward_memory_usage_cpu_float32: skipped
+            TestNumericDebugger.test_quantize_pt2e_preserve_handle: success
+            TestNumericDebugger.test_re_export_preserve_handle: success
+            TestPadding.test_reflection_pad1d: success
+            TestQuantizedConv.test_conv_reorder_issue_onednn: success
+            TestQuantizedConv.test_conv_transpose_reorder_issue_onednn: success
+            TestQuantizedFunctionalOps.test_relu_api: success
+            TestQuantizedLinear.test_qlinear_cudnn: skipped
+            TestQuantizedLinear.test_qlinear_gelu_pt2e: success
+            TestQuantizedOps.test_adaptive_avg_pool2d_nhwc: success
+            TestQuantizedOps.test_adaptive_avg_pool: skipped
+            TestQuantizedOps.test_qadd_relu_cudnn: skipped
+            TestQuantizedOps.test_qadd_relu_cudnn_nhwc: skipped
+            TestQuantizedOps.test_qadd_relu_different_qparams: success
+            TestTorchrun.test_bar: error
+            TestTorchrun.test_baz: failure
+            TestTorchrun.test_cpu_info: success
+            TestTorchrun.test_foo2: skipped
+            TestTorchrun.test_foo3: skipped
+            TestTorchrun.test_foo: skipped
+            TestTorchrun.test_multi_threads: success
+            TestTorchrun.test_reshape_cpu_float64: failure
+            TestTracer.test_jit_save: success
+            bar.test_2.test_func3: success
+            bar.test_foo.TestBar.test_func2: success
+            bar.test_foo.TestName.test_func1: success
+        """).strip())
+
+        #  Some error cases
+        error_log_dir = test_log_dir / 'faulty-reports'
+
+        self.assertErrorRegex(ValueError, "<testsuites> or <testsuite>",
+                              pytorch.get_test_results, error_log_dir / 'root')
+        self.assertErrorRegex(ValueError, "multiple reported files",
+                              pytorch.get_test_results, error_log_dir / 'multi_file')
+        self.assertErrorRegex(ValueError, "Path from folder and filename should be equal",
+                              pytorch.get_test_results, error_log_dir / 'different_file_name')
+        self.assertErrorRegex(ValueError, "Unexpected file attribute",
+                              pytorch.get_test_results, error_log_dir / 'file_attribute')
+        self.assertErrorRegex(ValueError, "Invalid state",
+                              pytorch.get_test_results, error_log_dir / 'skip_and_failed')
+        self.assertErrorRegex(ValueError, "no test",
+                              pytorch.get_test_results, error_log_dir / 'no_tests')
+        self.assertErrorRegex(ValueError, "Invalid test count",
+                              pytorch.get_test_results, error_log_dir / 'consistency')
+        self.assertErrorRegex(ValueError, "Duplicate test",
+                              pytorch.get_test_results, error_log_dir / 'duplicate')
+
 
 def suite(loader):
     """Return all easyblock-specific tests."""
diff --git a/test/pytorch_test_logs/README.md b/test/pytorch_test_logs/README.md
new file mode 100644
index 00000000000..7191e9e8815
--- /dev/null
+++ b/test/pytorch_test_logs/README.md
@@ -0,0 +1,5 @@
+# PyTorch test result files
+
+This Folder contains files as written by the PyTorch test step (via `unittest-xml-reportin`) to be used in tests of the parsing in the PyTorch easyblock.
+
+Most files are simplified or constructed in a way to reproduce a specific corner case of the parser or format.
diff --git a/test/pytorch_test_logs/cleanup_files.py b/test/pytorch_test_logs/cleanup_files.py
new file mode 100755
index 00000000000..9578f6f2532
--- /dev/null
+++ b/test/pytorch_test_logs/cleanup_files.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+
+"""This script strips content and filenames of PyTorch test result XML files in a deterministic way and formats them.
+The intent is to keep the general structure of the files but still make them shorter and easier to read.
+
+Usage: Pass the target directory as the single argument or
+run this script to format the XML files in the "full" directory next to the script.
+"""
+
+import re
+import subprocess
+import sys
+from hashlib import md5
+from pathlib import Path
+
+
+def shorten_filename(path: Path) -> Path:
+    """Shorten the file name by truncating random part of .e.g. test_quantization-d1303cbc2b57cf06.xml"""
+    match = re.search(r'-(?P<hash>[a-z0-9]{6,})\.xml$', path.name)
+    if match:
+        fixed_part: str = path.name[:match.start()]
+        short_hash = match['hash'][:5]
+        new_name: Path = path.with_name(f"{fixed_part}-{short_hash}.xml")
+        path.rename(new_name)
+        return new_name
+    return path
+
+
+def shorten_content(path: Path):
+    """Shorten attribute values and tag content (stdout, stderr, etc.) in the XML file."""
+    content: str = path.read_text(encoding='utf-8')
+
+    # Shorten messages in tags: <skipped message="...">
+    content = re.sub(r'message="[^"]+"', 'message="..."', content)
+    # Shorten time
+    content = re.sub(r'time="[^"]+"', 'time="4.2"', content)
+    # Ignore timestamp & hostname
+    content = re.sub(r'timestamp="[^"]+"', '', content)
+    content = re.sub(r'hostname="[^"]+"', '', content)
+    # Remove type attribute from <skipped> tags
+    content = re.sub(r'(<skipped)\s+type="[^"]+"', r'\1', content)
+
+    # Remove stdout/stderr from about half of the files.
+    # For the other half just shorten it.
+    remove_output: bool = int(md5(str(path.name).encode('utf-8')).hexdigest(), 16) % 2 == 0
+
+    # Shorten output shown between various tags
+    for tag in ["failure", "skipped", "system-out", "system-err", "rerun"]:
+        # Beware of multiline content in tags and empty tags (<tag/> or <tag key="value"/>)
+        pattern = re.compile(
+            rf'(<{tag}([^>/]*?)>)(.*?)</{tag}>',
+            re.DOTALL
+        )
+        if remove_output and tag in ["system-out", "system-err"]:
+            content = pattern.sub('', content)
+        else:
+            content = pattern.sub(rf'\1[snip]</{tag}>', content)
+
+    # Remove empty lines
+    content = re.sub(r'\n\s*\n', '\n', content)
+    # Combine empty tags
+    content = re.sub(r'(<(\w+) [^>]*)>\s*</\2>', r'\1/>', content)
+
+    path.write_text(content, encoding='utf-8')
+
+
+def format_xml(path: Path) -> bool:
+    try:
+        subprocess.check_output(
+            ["xmllint", "--format", str(path), "-o", str(path)],
+            encoding='utf-8',
+            stderr=subprocess.STDOUT,
+        )
+    except subprocess.CalledProcessError as e:
+        print(f'\nError formatting {path}: {e.output}', file=sys.stderr)
+        return False
+    return True
+
+
+def remove_if_empty(path: Path) -> bool:
+    content = path.read_text(encoding='utf-8')
+    if not re.search(r'<testsuite[^>]*[^/]>', content) and '<!--' not in content:
+        path.unlink()
+        return True
+    return False
+
+
+def main():
+    default_directory = Path(__file__).resolve().parent / "test-reports"
+    if '--help' in sys.argv or '-h' in sys.argv:
+        print("Usage: python cleanup_files.py [target_directory]")
+        print(f"Default target directory {default_directory}.")
+        sys.exit(1)
+    target_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else default_directory
+    xml_files = list(target_dir.rglob("*.xml"))
+    num_files = len(xml_files)
+
+    reply = input(f"Process {num_files} XML files in {target_dir}? [y/n] ").strip()
+    if not re.match(r'^[Yy]$', reply):
+        print("Aborting.")
+        sys.exit(1)
+
+    print(f"Processing file 0/{num_files}...", end='', flush=True)
+
+    for i, path in enumerate(xml_files, 1):
+        print(f"\rProcessing file {i}/{num_files}...", end='', flush=True)
+
+        if remove_if_empty(path):
+            continue
+
+        path = shorten_filename(path)
+        shorten_content(path)
+        if not format_xml(path):
+            sys.exit(1)
+
+    # Delete empty directories
+    for d in sorted(target_dir.rglob("*"), key=lambda p: -len(str(p))):
+        if d.is_dir() and not any(d.iterdir()):
+            d.rmdir()
+
+    print(" done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml
new file mode 100644
index 00000000000..6ec57f15910
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<testsuites>
+  <!-- 'tests' attribute is to low -->
+  <testsuite name="pytest" errors="1" failures="1" skipped="1" tests="2" time="4.2">
+    <testcase classname="TestName" name="test1" time="4.2" file="test_name.py">
+      <skipped message=""/>
+    </testcase>
+    <testcase classname="TestName" name="test2" time="4.2" file="test_name.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="TestName" name="test3" time="4.2" file="test_name.py">
+      <error message="...">[snip]</error>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/different_file_name/sync.skip.test_name/sync.skip.test_foo-1.xml b/test/pytorch_test_logs/faulty-reports/different_file_name/sync.skip.test_name/sync.skip.test_foo-1.xml
new file mode 100644
index 00000000000..67893db4a7e
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/different_file_name/sync.skip.test_name/sync.skip.test_foo-1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <!-- Correct but wrong file or folder name -->
+    <testcase classname="TestName" name="test_foo" time="4.2" file="sync/skip.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/duplicate/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/duplicate/test_name/test_name-1.xml
new file mode 100644
index 00000000000..35f6368ed12
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/duplicate/test_name/test_name-1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <testcase classname="TestName" name="test2" time="4.2" file="test_name.py"/>
+    <testcase classname="TestName" name="test2" time="4.2" file="test_name.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/file_attribute/test_name/TEST-foo.test_name.TestName-1.xml b/test/pytorch_test_logs/faulty-reports/file_attribute/test_name/TEST-foo.test_name.TestName-1.xml
new file mode 100644
index 00000000000..bdc796e33e3
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/file_attribute/test_name/TEST-foo.test_name.TestName-1.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <!-- Expected from filename: foo/test_name.py -->
+    <testcase classname="TestName" name="test2" time="4.2" file="foo/test_foo.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/multi_file/test_name/TEST-Name-1.xml b/test/pytorch_test_logs/faulty-reports/multi_file/test_name/TEST-Name-1.xml
new file mode 100644
index 00000000000..78dd9f5c7fb
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/multi_file/test_name/TEST-Name-1.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="2" time="4.2">
+    <testcase classname="TestName" name="test1" time="4.2" file="test_foo.py"/>
+    <!-- Different filename -->
+    <testcase classname="TestName" name="test2" time="4.2" file="test_bar.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/no_tests/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/no_tests/test_name/test_name-1.xml
new file mode 100644
index 00000000000..568b67bb677
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/no_tests/test_name/test_name-1.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <!-- Empty -->
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/faulty-reports/root/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/root/test_name/test_name-1.xml
new file mode 100644
index 00000000000..a2ecb902aa7
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/root/test_name/test_name-1.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<!-- Missing testsuite/testsuites at root -->
+<testcase classname="MissingRootTag" name="test1" time="3.14" file="test_name.py"/>
diff --git a/test/pytorch_test_logs/faulty-reports/skip_and_failed/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/skip_and_failed/test_name/test_name-1.xml
new file mode 100644
index 00000000000..fdae2191838
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/skip_and_failed/test_name/test_name-1.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <testcase classname="TestName" name="test2" time="4.2" file="test_name.py">
+      <skipped message=""/>
+      <failure message="...">[snip]</failure>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/dist-gloo-init-env/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-1d671.xml b/test/pytorch_test_logs/test-reports/dist-gloo-init-env/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-1d671.xml
new file mode 100644
index 00000000000..bdf01393666
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/dist-gloo-init-env/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-1d671.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <testcase classname="DistQuantizationTests" name="test_all_gather_fp16" time="4.2" file="distr/algorithms/quantization/test_quantization.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/dist-gloo-init-file/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-78879.xml b/test/pytorch_test_logs/test-reports/dist-gloo-init-file/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-78879.xml
new file mode 100644
index 00000000000..bdf01393666
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/dist-gloo-init-file/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-78879.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <testcase classname="DistQuantizationTests" name="test_all_gather_fp16" time="4.2" file="distr/algorithms/quantization/test_quantization.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/dist-nccl-init-env/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-5f224.xml b/test/pytorch_test_logs/test-reports/dist-nccl-init-env/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-5f224.xml
new file mode 100644
index 00000000000..bdf01393666
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/dist-nccl-init-env/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-5f224.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <testcase classname="DistQuantizationTests" name="test_all_gather_fp16" time="4.2" file="distr/algorithms/quantization/test_quantization.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/dist-nccl-init-file/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-d5cb5.xml b/test/pytorch_test_logs/test-reports/dist-nccl-init-file/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-d5cb5.xml
new file mode 100644
index 00000000000..bdf01393666
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/dist-nccl-init-file/distr.algorithms.quantization.test_quantization/distr.algorithms.quantization.test_quantization-d5cb5.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <testcase classname="DistQuantizationTests" name="test_all_gather_fp16" time="4.2" file="distr/algorithms/quantization/test_quantization.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-1.xml b/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-1.xml
new file mode 100644
index 00000000000..dbc55f47eec
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-1.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="1" failures="3" skipped="3" tests="7" time="4.2">
+    <testcase classname="TestTorchrun" name="test_reshape_cpu_float64" time="4.2" file="test_nestedtensor.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="TestTorchrun" name="test_cpu_info" time="4.2" file="backends/xeon/test_launch.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="TestTorchrun" name="test_foo" time="4.2" file="backends/xeon/test_launch.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestTorchrun" name="test_foo2" time="4.2" file="backends/xeon/test_launch.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestTorchrun" name="test_foo3" time="4.2" file="backends/xeon/test_launch.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestTorchrun" name="test_bar" time="4.2" file="backends/xeon/test_launch.py">
+      <error message="...">[snip]</error>
+    </testcase>
+    <testcase classname="TestTorchrun" name="test_baz" time="4.2" file="backends/xeon/test_launch.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-2.xml b/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-2.xml
new file mode 100644
index 00000000000..c8372ce9101
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-2.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="2" time="4.2">
+    <!-- Now success -->
+    <testcase classname="TestTorchrun" name="test_cpu_info" time="4.2" file="backends/xeon/test_launch.py"/>
+    <testcase classname="TestTorchrun" name="test_multi_threads" time="4.2" file="backends/xeon/test_launch.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-3.xml b/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-3.xml
new file mode 100644
index 00000000000..1c86924bf8f
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-3.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="0" time="4.2">
+    <!-- Empty on purpose -->
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/distributed.tensor.test_dtensor_ops/distributed.tensor.test_dtensor_ops-2fe9b.xml b/test/pytorch_test_logs/test-reports/python-pytest/distributed.tensor.test_dtensor_ops/distributed.tensor.test_dtensor_ops-2fe9b.xml
new file mode 100644
index 00000000000..b1010ead5c7
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/distributed.tensor.test_dtensor_ops/distributed.tensor.test_dtensor_ops-2fe9b.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="406" tests="670" time="4.2">
+    <testcase classname="TestDTensorOpsCPU" name="test_dtensor_op_db_H_cpu_float32" time="4.2" file="distributed/tensor/test_dtensor_ops.py"/>
+    <testcase classname="TestDTensorOpsCPU" name="test_dtensor_op_db_H_cpu_float64" time="4.2" file="distributed/tensor/test_dtensor_ops.py">
+      <skipped message=""/>
+    </testcase>
+    <testcase classname="TestDTensorOpsCPU" name="test_dtensor_op_db_H_cpu_float16" time="4.2" file="distributed/tensor/test_dtensor_ops.py">
+      <rerun message="...">[snip]</rerun>
+    </testcase>
+    <testcase classname="TestDTensorOpsCPU" name="test_dtensor_op_db_H_cpu_int8" time="4.2" file="distributed/tensor/test_dtensor_ops.py">
+      <!-- Both tags: run twice -->
+      <rerun message="...">[snip]</rerun>
+      <skipped message=""/>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-189f6.xml b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-189f6.xml
new file mode 100644
index 00000000000..9b5af2a0487
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-189f6.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="0" tests="4" time="4.2">
+    <testcase classname="DynamicShapesMiscTests" name="test_packaging_version_parse_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py">
+      <system-out>[snip]</system-out>
+    </testcase>
+    <testcase classname="DynamicShapesMiscTests" name="test_pair_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py">
+      <system-out>[snip]</system-out>
+    </testcase>
+    <testcase classname="DynamicShapesMiscTests" name="test_param_shape_binops_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py">
+      <system-out>[snip]</system-out>
+    </testcase>
+    <testcase classname="DynamicShapesMiscTests" name="test_parameter_free_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py">
+      <rerun message="...">[snip]</rerun>
+      <rerun message="...">[snip]</rerun>
+      <failure message="...">[snip]</failure>
+      <system-out>[snip]</system-out>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-266ee.xml b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-266ee.xml
new file mode 100644
index 00000000000..94500d25c6a
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-266ee.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="0" tests="10" time="4.2">
+    <testcase classname="DynamicShapesMiscTests" name="test_patched_builtin_functions_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_proxy_frozen_dataclass_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_pt2_compliant_ops_are_allowed_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_pt2_compliant_overload_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_pure_python_accumulate_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_py_guards_mark_dynamic_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_python_slice_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_pytree_tree_flatten_unflatten_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_pytree_tree_leaves_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-3f6e0.xml b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-3f6e0.xml
new file mode 100644
index 00000000000..c0cbe2faa59
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_dynamic_shapes/dynamo.test_dynamic_shapes-3f6e0.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="7" tests="12" time="4.2">
+    <testcase classname="DynamicShapesCtxManagerTests" name="test_autograd_profiler_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesCtxManagerTests" name="test_generic_context_manager_with_graph_break_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesCtxManagerTests" name="test_generic_ctx_manager_with_graph_break_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py"/>
+    <testcase classname="DynamicShapesMiscTests" name="test_outside_linear_module_free_dynamic_shapes" time="4.2" file="dynamo/test_dynamic_shapes.py">
+      <rerun message="...">[snip]</rerun>
+      <rerun message="...">[snip]</rerun>
+      <failure message="...">[snip]</failure>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-18930.xml b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-18930.xml
new file mode 100644
index 00000000000..dd64b286cbb
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-18930.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="0" tests="1" time="4.2">
+    <testcase classname="MiscTests" name="test_parameter_free" time="4.2" file="dynamo/test_misc.py">
+      <rerun message="...">[snip]</rerun>
+      <rerun message="...">[snip]</rerun>
+      <failure message="...">[snip]</failure>
+      <system-out>[snip]</system-out>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-86d5b.xml b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-86d5b.xml
new file mode 100644
index 00000000000..7969c7d1799
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-86d5b.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="2" tests="34" time="4.2">
+    <testcase classname="MiscTests" name="test_pytree_tree_map" time="4.2" file="dynamo/test_misc.py"/>
+    <testcase classname="MiscTests" name="test_shape_env_no_recording" time="4.2" file="dynamo/test_misc.py"/>
+    <testcase classname="MiscTests" name="test_shape_env_recorded_function_fallback" time="4.2" file="dynamo/test_misc.py"/>
+    <testcase classname="MiscTests" name="test_param_shape_binops" time="4.2" file="dynamo/test_misc.py"/>
+    <testcase classname="MiscTests" name="test_yield_from_in_a_loop" time="4.2" file="dynamo/test_misc.py"/>
+    <testcase classname="MiscTests" name="test_parameter_free" time="4.2" file="dynamo/test_misc.py">
+      <rerun message="...">[snip]</rerun>
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="TestTracer" name="test_jit_save" time="4.2" file="dynamo/test_misc.py"/>
+    <testcase classname="TestCustomFunction" name="test_autograd_function_with_matmul_folding_at_output" time="4.2" file="dynamo/test_misc.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-d062d.xml b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-d062d.xml
new file mode 100644
index 00000000000..74e37835363
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/dynamo.test_misc/dynamo.test_misc-d062d.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="0" tests="4" time="4.2">
+    <testcase classname="MiscTests" name="test_packaging_version_parse" time="4.2" file="dynamo/test_misc.py">
+      <system-out>[snip]</system-out>
+    </testcase>
+    <testcase classname="MiscTests" name="test_pair" time="4.2" file="dynamo/test_misc.py">
+      <system-out>[snip]</system-out>
+    </testcase>
+    <testcase classname="MiscTests" name="test_param_shape_binops" time="4.2" file="dynamo/test_misc.py">
+      <system-out>[snip]</system-out>
+    </testcase>
+    <testcase classname="MiscTests" name="test_parameter_free" time="4.2" file="dynamo/test_misc.py">
+      <rerun message="...">[snip]</rerun>
+      <rerun message="...">[snip]</rerun>
+      <failure message="...">[snip]</failure>
+      <system-out>[snip]</system-out>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/run_test/run_test.xml b/test/pytorch_test_logs/test-reports/python-pytest/run_test/run_test.xml
new file mode 100644
index 00000000000..2964c5a3e6f
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/run_test/run_test.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="4" time="4.2">
+    <!-- Tests in common path test/dist/foo/bar -->
+    <testcase classname="test.dist.foo.bar.test_foo.TestName" name="test_func1" time="4.2" file="test_foo.py"/>
+    <testcase classname="test.dist.foo.bar.test_foo.TestBar" name="test_func2" time="4.2" file="test_foo.py"/>
+    <testcase classname="test.dist.foo.bar.test_2" name="test_func3" time="4.2" file="test_2.py"/>
+    <!-- Extra test directly in run_test.py -->
+    <testcase classname="TestName" name="test_bar" time="4.2" file="run_test.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/test_nestedtensor/test_nestedtensor-671fe.xml b/test/pytorch_test_logs/test-reports/python-pytest/test_nestedtensor/test_nestedtensor-671fe.xml
new file mode 100644
index 00000000000..802b93b6665
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/test_nestedtensor/test_nestedtensor-671fe.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="1" tests="1" time="4.2">
+    <testcase classname="TestNestedTensorSubclassCPU" name="test_linear_backward_memory_usage_cpu_float32" time="4.2" file="test_nestedtensor.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/test_nestedtensor/test_nestedtensor-8e17a.xml b/test/pytorch_test_logs/test-reports/python-pytest/test_nestedtensor/test_nestedtensor-8e17a.xml
new file mode 100644
index 00000000000..c2049f46e24
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/test_nestedtensor/test_nestedtensor-8e17a.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<testsuites>
+  <!-- Shortened version of real case: count attribute values are more than present cases -->
+  <testsuite name="pytest" errors="2" failures="4" skipped="3" tests="9" time="4.2">
+    <testcase classname="TestNestedTensor" name="test_cat" time="4.2" file="test_nestedtensor.py"/>
+    <testcase classname="TestNestedTensor" name="test_copy_" time="4.2" file="test_nestedtensor.py"/>
+    <testcase classname="TestNestedTensor" name="test_reshape_cpu_float16" time="4.2" file="test_nestedtensor.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestNestedTensor" name="test_reshape_cpu_float32" time="4.2" file="test_nestedtensor.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestNestedTensor" name="test_reshape_cpu_float64" time="4.2" file="test_nestedtensor.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="TestNestedTensor" name="test_bmm_cuda_gpu_float16" time="4.2" file="test_nestedtensor.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="TestNestedTensor" name="test_bmm_cuda_gpu_float32" time="4.2" file="test_nestedtensor.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="TestNestedTensor" name="test_bmm_cuda_gpu_float64" time="4.2" file="test_nestedtensor.py">
+      <error message="...">[snip]</error>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/test_quantization/test_quantization-3146b.xml b/test/pytorch_test_logs/test-reports/python-pytest/test_quantization/test_quantization-3146b.xml
new file mode 100644
index 00000000000..981e103162d
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/test_quantization/test_quantization-3146b.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="1" time="4.2">
+    <testcase classname="TestNumericDebugger" name="test_re_export_preserve_handle" time="4.2" file="test_quantization.py"/>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/test_quantization/test_quantization-97a67.xml b/test/pytorch_test_logs/test-reports/python-pytest/test_quantization/test_quantization-97a67.xml
new file mode 100644
index 00000000000..18afe8bc184
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/test_quantization/test_quantization-97a67.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="5" tests="17" time="4.2">
+    <testcase classname="TestQuantizedOps" name="test_adaptive_avg_pool" time="4.2" file="test_quantization.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestQuantizedOps" name="test_adaptive_avg_pool2d_nhwc" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestQuantizedOps" name="test_qadd_relu_cudnn" time="4.2" file="test_quantization.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestQuantizedOps" name="test_qadd_relu_cudnn_nhwc" time="4.2" file="test_quantization.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestQuantizedOps" name="test_qadd_relu_different_qparams" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestQuantizedLinear" name="test_qlinear_cudnn" time="4.2" file="test_quantization.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestQuantizedLinear" name="test_qlinear_gelu_pt2e" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestQuantizedConv" name="test_conv_reorder_issue_onednn" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestQuantizedConv" name="test_conv_transpose_reorder_issue_onednn" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestDynamicQuantizedOps" name="test_qrnncell" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestPadding" name="test_reflection_pad1d" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestQuantizedFunctionalOps" name="test_relu_api" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestFakeQuantizeOps" name="test_backward_per_channel" time="4.2" file="test_quantization.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="TestFakeQuantizeOps" name="test_backward_per_channel_cachemask_cpu" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestFakeQuantizeOps" name="test_backward_per_channel_cachemask_cuda" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestNumericDebugger" name="test_quantize_pt2e_preserve_handle" time="4.2" file="test_quantization.py"/>
+    <testcase classname="TestNumericDebugger" name="test_re_export_preserve_handle" time="4.2" file="test_quantization.py">
+      <rerun message="...">[snip]</rerun>
+      <rerun message="...">[snip]</rerun>
+      <failure message="...">[snip]</failure>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-unittest/jit.test_builtins/TEST-jit.test_builtins.TestBuiltins-1.xml b/test/pytorch_test_logs/test-reports/python-unittest/jit.test_builtins/TEST-jit.test_builtins.TestBuiltins-1.xml
new file mode 100644
index 00000000000..f161a814732
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-unittest/jit.test_builtins/TEST-jit.test_builtins.TestBuiltins-1.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite name="TEST-jit.test_builtins.TestBuiltins-1" tests="1" file=".py" time="4.2" failures="0" errors="0" skipped="0">
+  <testcase classname="TestBuiltins" name="test_name" time="4.2" file="jit/test_builtins.py" line="9"/>
+</testsuite>
diff --git a/test/pytorch_test_logs/test-reports/python-unittest/test_autoload/TEST-TestBackend-1.xml b/test/pytorch_test_logs/test-reports/python-unittest/test_autoload/TEST-TestBackend-1.xml
new file mode 100644
index 00000000000..c2686aee667
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-unittest/test_autoload/TEST-TestBackend-1.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite name="TestBackendAutoload-20250623211756" tests="1" file=".py" time="4.2" failures="0" errors="0" skipped="0">
+  <testcase classname="TestBackendAutoload" name="test_autoload" time="4.2" file="test_autoload.py" line="9"/>
+</testsuite>
diff --git a/test/pytorch_test_logs/test-reports/python-unittest/test_autoload/TEST-TestBackend-2.xml b/test/pytorch_test_logs/test-reports/python-unittest/test_autoload/TEST-TestBackend-2.xml
new file mode 100644
index 00000000000..3477dc744ca
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-unittest/test_autoload/TEST-TestBackend-2.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite name="TestBackendAutoload-20250623211226" tests="1" file=".py" time="4.2" failures="0" errors="0" skipped="1">
+  <testcase classname="TestBackendAutoload" name="test_autoload" time="4.2" file="test_autoload.py" line="9"/>
+  <testcase classname="TestBackendAutoload" name="test_unload" time="4.2" file="test_autoload.py" line="18">
+    <skipped message="...">[snip]</skipped>
+  </testcase>
+</testsuite>

From 420d8502c58a8e49ea94732fcd131ef321293b41 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Thu, 26 Jun 2025 17:21:49 +0200
Subject: [PATCH 05/20] Gracefully handle empty test result files

---
 easybuild/easyblocks/p/pytorch.py                          | 7 ++++++-
 test/easyblocks/easyblock_specific.py                      | 2 ++
 .../faulty-reports/invalid_xml/test_name/test_name-1.xml   | 5 +++++
 .../backends.xeon.test_launch-4.xml                        | 1 +
 4 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 test/pytorch_test_logs/faulty-reports/invalid_xml/test_name/test_name-1.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-4.xml

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index 93f09dc76a0..304b72b3198 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -992,7 +992,12 @@ def parse_test_result_file(xml_file: Path) -> List[TestSuite]:
     :return: A list of TestSuite objects representing the parsed structure.
     """
     try:
-        root = ET.parse(xml_file).getroot()
+        try:
+            root = ET.parse(xml_file).getroot()
+        except ET.ParseError:
+            if '<test' not in xml_file.read_text():
+                return []  # Empty file, no test results
+            raise
 
         # Normalize root to be a list of test suite elements
         if root.tag == "testsuites":
diff --git a/test/easyblocks/easyblock_specific.py b/test/easyblocks/easyblock_specific.py
index 023087b8499..2e6adae6316 100644
--- a/test/easyblocks/easyblock_specific.py
+++ b/test/easyblocks/easyblock_specific.py
@@ -648,6 +648,8 @@ def test_pytorch_test_log_parsing(self):
 
         self.assertErrorRegex(ValueError, "<testsuites> or <testsuite>",
                               pytorch.get_test_results, error_log_dir / 'root')
+        self.assertErrorRegex(ValueError, "Failed to parse",
+                              pytorch.get_test_results, error_log_dir / 'invalid_xml')
         self.assertErrorRegex(ValueError, "multiple reported files",
                               pytorch.get_test_results, error_log_dir / 'multi_file')
         self.assertErrorRegex(ValueError, "Path from folder and filename should be equal",
diff --git a/test/pytorch_test_logs/faulty-reports/invalid_xml/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/invalid_xml/test_name/test_name-1.xml
new file mode 100644
index 00000000000..d32f954bbb8
--- /dev/null
+++ b/test/pytorch_test_logs/faulty-reports/invalid_xml/test_name/test_name-1.xml
@@ -0,0 +1,5 @@
+<testsuite name="pytest" errors="1" failures="0" skipped="0" tests="1" time="4.2">
+  <testcase classname="TestName" name="test1" time="4.2" file="test_name.py">
+  <!-- wrong closing tag so XML fails to parse -->
+  </wrong>
+</testsuite>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-4.xml b/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-4.xml
new file mode 100644
index 00000000000..9a3e05b534f
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/backends.xeon.test_launch/backends.xeon.test_launch-4.xml
@@ -0,0 +1 @@
+<!-- Empty on purpose -->

From b472ced887078880e2e436882e6167688edf15fe Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Tue, 22 Jul 2025 17:03:48 +0200
Subject: [PATCH 06/20] Also clean errror-tags

---
 test/pytorch_test_logs/cleanup_files.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/pytorch_test_logs/cleanup_files.py b/test/pytorch_test_logs/cleanup_files.py
index 9578f6f2532..22538be3851 100755
--- a/test/pytorch_test_logs/cleanup_files.py
+++ b/test/pytorch_test_logs/cleanup_files.py
@@ -45,7 +45,7 @@ def shorten_content(path: Path):
     remove_output: bool = int(md5(str(path.name).encode('utf-8')).hexdigest(), 16) % 2 == 0
 
     # Shorten output shown between various tags
-    for tag in ["failure", "skipped", "system-out", "system-err", "rerun"]:
+    for tag in ["error", "failure", "skipped", "system-out", "system-err", "rerun"]:
         # Beware of multiline content in tags and empty tags (<tag/> or <tag key="value"/>)
         pattern = re.compile(
             rf'(<{tag}([^>/]*?)>)(.*?)</{tag}>',

From 22108906dea0883458465135123e08d7c5e5adb0 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Tue, 22 Jul 2025 17:04:05 +0200
Subject: [PATCH 07/20] Ignore error on formatting empty XML

---
 test/pytorch_test_logs/cleanup_files.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/pytorch_test_logs/cleanup_files.py b/test/pytorch_test_logs/cleanup_files.py
index 22538be3851..59ce1de3016 100755
--- a/test/pytorch_test_logs/cleanup_files.py
+++ b/test/pytorch_test_logs/cleanup_files.py
@@ -72,8 +72,10 @@ def format_xml(path: Path) -> bool:
             stderr=subprocess.STDOUT,
         )
     except subprocess.CalledProcessError as e:
-        print(f'\nError formatting {path}: {e.output}', file=sys.stderr)
-        return False
+        # Ignore error "Start tag expected" for empty files
+        if '<!-- Empty' not in path.read_text(encoding='utf-8'):
+            print(f'\nError formatting {path}: {e.output}', file=sys.stderr)
+            return False
     return True
 
 

From d27da69596cded9c751041a32d8064f809709ca4 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Tue, 22 Jul 2025 17:22:14 +0200
Subject: [PATCH 08/20] Relax condition on reported number of tests

We can't rely on the "tests"-attribute as a test might appear in the errors- and failures-count attribute.
Seen in the provided test case within the 2nd, duplicated <testcase>
after the first contained a `<failure>`:
  <error message="failed on teardown with &quot;AssertionError: Scalars are not equal![...]
---
 easybuild/easyblocks/p/pytorch.py               | 17 ++++++++++++-----
 test/easyblocks/easyblock_specific.py           |  4 +++-
 .../consistency/test_name/test_name-1.xml       |  5 ++++-
 .../inductor.test_cudagraph_trees-17dac.xml     | 14 ++++++++++++++
 4 files changed, 33 insertions(+), 7 deletions(-)
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/inductor.test_cudagraph_trees/inductor.test_cudagraph_trees-17dac.xml

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index 304b72b3198..d3bae5e6dfa 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -1022,9 +1022,10 @@ def parse_test_result_file(xml_file: Path) -> List[TestSuite]:
             # when unittest's `subTest` is used: https://github.com/xmlrunner/unittest-xml-reporting/issues/292
             num_tests = int(test_suite.attrib["tests"])
             # But it needs to be at least consistent with the "non-passing" test numbers
-            if num_tests < failures + skipped + errors:
+            # A test that failed AND errored might not be counted twice, so don't add failures and errors
+            if num_tests < max(failures, errors) + skipped:
                 raise ValueError(f"Invalid test count: "
-                                 f"{num_tests} tests, {failures} failures, {skipped} skipped, {errors} errors")
+                                 f"{num_tests} tests vs {failures} failures, {skipped} skipped, {errors} errors")
 
             parsed_test_cases = parse_test_cases(test_suite)
             if not parsed_test_cases:
@@ -1035,9 +1036,15 @@ def parse_test_result_file(xml_file: Path) -> List[TestSuite]:
 
             test_cases: Dict[str, TestCase] = {}
             for test_case in parsed_test_cases:
-                if test_case.name in test_cases:
-                    raise ValueError(f"Duplicate test case '{test_case}' in test suite {suite_name}")
-                test_cases[test_case.name] = test_case
+                try:
+                    old_test_case = test_cases[test_case.name]
+                except KeyError:
+                    # No test with that name yet, so add it
+                    test_cases[test_case.name] = test_case
+                else:
+                    # Ignore the case where a test failed and errored which might happen if teardown fails
+                    if {old_test_case.state, test_case.state} != {TestState.ERROR, TestState.FAILURE}:
+                        raise ValueError(f"Duplicate test case '{test_case}' in test suite {suite_name}")
 
             test_suites.append(
                 TestSuite(name=suite_name, test_cases=test_cases,
diff --git a/test/easyblocks/easyblock_specific.py b/test/easyblocks/easyblock_specific.py
index 2e6adae6316..766d27246c0 100644
--- a/test/easyblocks/easyblock_specific.py
+++ b/test/easyblocks/easyblock_specific.py
@@ -523,7 +523,7 @@ def test_pytorch_test_log_parsing(self):
             self.assertEqual((name, suite.summary), (name, results2[name].summary))
         del results2
 
-        self.assertEqual(len(results), 13)
+        self.assertEqual(len(results), 14)
 
         # 2 small test suites used as a smoke test using a most features
         self.assertIn('backends/xeon/test_launch', results)
@@ -556,6 +556,7 @@ def test_pytorch_test_log_parsing(self):
             distributed/tensor/test_dtensor_ops: 0 failed, 2 passed, 2 skipped, 0 errors
             dynamo/test_dynamic_shapes: 3 failed, 14 passed, 0 skipped, 0 errors
             dynamo/test_misc: 1 failed, 9 passed, 0 skipped, 0 errors
+            inductor/test_cudagraph_trees: 1 failed, 0 passed, 0 skipped, 0 errors
             jit/test_builtins: 0 failed, 1 passed, 0 skipped, 0 errors
             test_autoload: 0 failed, 1 passed, 1 skipped, 0 errors
             test_nestedtensor: 3 failed, 2 passed, 3 skipped, 1 errors
@@ -565,6 +566,7 @@ def test_pytorch_test_log_parsing(self):
                                  for suite in results.values()
                                  for test in suite.get_tests()))
         self.assertEqual(tests, textwrap.dedent("""
+            CudaGraphTreeTests.test_workspace_allocation_error: failure
             DistQuantizationTests.test_all_gather_fp16: success
             DistQuantizationTests.test_all_gather_fp16: success
             DistQuantizationTests.test_all_gather_fp16: success
diff --git a/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml b/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml
index 6ec57f15910..57ba89836c8 100644
--- a/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml
+++ b/test/pytorch_test_logs/faulty-reports/consistency/test_name/test_name-1.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <testsuites>
   <!-- 'tests' attribute is to low -->
-  <testsuite name="pytest" errors="1" failures="1" skipped="1" tests="2" time="4.2">
+  <testsuite name="pytest" errors="2" failures="1" skipped="1" tests="2" time="4.2">
     <testcase classname="TestName" name="test1" time="4.2" file="test_name.py">
       <skipped message=""/>
     </testcase>
@@ -11,5 +11,8 @@
     <testcase classname="TestName" name="test3" time="4.2" file="test_name.py">
       <error message="...">[snip]</error>
     </testcase>
+    <testcase classname="TestName" name="test4" time="4.2" file="test_name.py">
+      <error message="...">[snip]</error>
+    </testcase>
   </testsuite>
 </testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_cudagraph_trees/inductor.test_cudagraph_trees-17dac.xml b/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_cudagraph_trees/inductor.test_cudagraph_trees-17dac.xml
new file mode 100644
index 00000000000..6fb969802b8
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_cudagraph_trees/inductor.test_cudagraph_trees-17dac.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<testsuites>
+  <!-- Same test failed and then errored (e.g. during teardown) got counted only once -->
+  <testsuite name="pytest" errors="1" failures="1" skipped="0" tests="1" time="4.2">
+    <testcase classname="CudaGraphTreeTests" name="test_workspace_allocation_error" time="4.2" file="inductor/test_cudagraph_trees.py">
+      <rerun message="...">[snip]</rerun>
+      <rerun message="...">[snip]</rerun>
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="CudaGraphTreeTests" name="test_workspace_allocation_error" time="4.2" file="inductor/test_cudagraph_trees.py">
+      <error message="...">[snip]</error>
+    </testcase>
+  </testsuite>
+</testsuites>

From 8e2033c4caa4062288ee6cfc58bb891b302801f0 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Mon, 28 Jul 2025 10:31:47 +0200
Subject: [PATCH 09/20] Find PyTorch test suite variants

Current failures include
> Parsing the test result files missed the following failed suites: distributed/algorithms/quantization/test_quantization

The suite name as contained in the XML results is:
> dist-nccl/distributed/algorithms/quantization/test_quantization

So if the suite name isn't found as-is (fast due to dict hashing)
also check for the name without the variant (rare).
To avoid false-positives limit to variants starting with `dist-`.
---
 easybuild/easyblocks/p/pytorch.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index d3bae5e6dfa..abc788ce0dc 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -645,8 +645,17 @@ def test_step(self):
                 else:
                     msg = f'Failed to find any test report files at {test_reports_path}'
                 raise EasyBuildError(msg)
+
+            def suite_is_in_xml_results(suite_name):
+                """Check if the suite is in the XML results"""
+                if suite_name in xml_results:
+                    return True
+                # Handle variants like dist-nccl/test_c10d_nccl
+                return any(xml_suite_name.split(os.path.sep, maxsplit=1)[-1] == suite_name
+                           for xml_suite_name in xml_results if xml_suite_name.startswith('dist-'))
+
             missing_suites = [suite.name for suite in parsed_test_result.failed_suites
-                              if suite.name not in xml_results]
+                              if not suite_is_in_xml_results(suite.name)]
             if missing_suites:
                 raise EasyBuildError('Parsing the test result files missed the following failed suites: %s',
                                      ', '.join(sorted(missing_suites)))

From 804a0486f090dcfad11de7ed59c686797c7ec8c4 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Fri, 19 Sep 2025 10:11:02 +0200
Subject: [PATCH 10/20] Isolate against more user env variables

---
 easybuild/easyblocks/p/pytorch.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index abc788ce0dc..f206b1af7e9 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -538,15 +538,18 @@ def add_enable_option(name, enabled):
         self.cfg.update('prebuildopts', ' '.join(unique_options) + ' ')
         self.cfg.update('preinstallopts', ' '.join(unique_options) + ' ')
 
-    def _set_cache_dir(self):
-        """Set $XDG_CACHE_HOME and $TRITON_HOME to avoid PyTorch defaulting to $HOME"""
+    def _set_cache_dirs(self):
+        """Set $XDG_CACHE_HOME and $TRITON_HOME to avoid PyTorch defaulting to $HOME
+        and similar variables to ensure clean build/test environment
+        """
         cache_dir = os.path.join(self.tmpdir, '.cache')
         # The path must exist!
         mkdir(cache_dir, parents=True)
         env.setvar('XDG_CACHE_HOME', cache_dir)
         # Triton also uses a path defaulting to $HOME
-        # Isolate against user-set variables
-        env.unset_env_vars(('TRITON_DUMP_DIR', 'TRITON_OVERRIDE_DIR', 'TRITON_CACHE_DIR'))
+        # Isolate against user-set variables which could lead to reusing caches that may fail test
+        env.unset_env_vars(('TRITON_DUMP_DIR', 'TRITON_OVERRIDE_DIR', 'TRITON_CACHE_DIR',
+                            'TORCH_HOME', 'TORCHINDUCTOR_CACHE_DIR', 'PYTORCH_KERNEL_CACHE_PATH'))
         triton_home = os.path.join(self.tmpdir, '.triton_home')
         env.setvar('TRITON_HOME', triton_home)
 
@@ -599,7 +602,7 @@ def get_test_name_diff(lst_should, lst_is):
 
     def test_step(self):
         """Run unit tests"""
-        self._set_cache_dir()
+        self._set_cache_dirs()
         # Pretend to be on FB CI which disables some tests, especially those which download stuff
         env.setvar('SANDCASTLE', '1')
         # Skip this test(s) which is very flaky
@@ -780,7 +783,7 @@ def suite_is_in_xml_results(suite_name):
             raise EasyBuildError("Test command had non-zero exit code (%s), but no failed tests found?!", tests_ec)
 
     def test_cases_step(self):
-        self._set_cache_dir()
+        self._set_cache_dirs()
         super().test_cases_step()
 
     def sanity_check_step(self, *args, **kwargs):

From 9949ca89c7abe14901a8f9fb35f212baa834f57a Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Fri, 19 Sep 2025 10:11:38 +0200
Subject: [PATCH 11/20] Avoid PyTorch configure warnings/issues by explicitly
 setting (more) dependency options

Most of the options have a True/False value which we should set to
False/0 when we don't have/use that dependency.
This ensures that a) no system lib will be found and b) no warning will
be shown.

Also update the list with options added or removed until PyTorch 2.7
---
 easybuild/easyblocks/p/pytorch.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index f206b1af7e9..e7b03046688 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -332,12 +332,17 @@ def is_version_ok(version_range):
         available_libs = (
             # Format: (PyTorch flag to enable, EB name, '<min version>:<exclusive max version>')
             # Use `None` for the EB name if no known EC exists
-            ('USE_FFMPEG=1', 'FFmpeg', '1.0.0:'),
+            # Check the comment on top of setup.y
+            ('USE_FFMPEG=1', 'FFmpeg', '1.0.0:2.4.0'),
             ('USE_GFLAGS=1', 'gflags', '1.0.0:'),
             ('USE_GLOG=1', 'glog', '1.0.0:'),
+            ('USE_CUDSS=1', 'cuDSS', '1.0.0:'),
+            ('USE_CUSPARSELT=1', 'cuSPARSELt', '2.7:'),
+            ('USE_UCC=1', 'UCC-CUDA', '1.13.0:'),
+            ('USE_SYSTEM_UCC=1', 'UCC-CUDA', '1.13.0:'),
 
             # For system libs check CMakeLists.txt, below `if(USE_SYSTEM_LIBS)`, order kept here
-            # NCCL handled specially as other env variables are requires for it
+            # NCCL handled specially as other env variables are required for it
             ('USE_SYSTEM_CPUINFO=1', None, '1.6.0:'),
             ('USE_SYSTEM_SLEEF=1', None, '1.6.0:'),
             ('USE_SYSTEM_GLOO=1', None, '1.6.0:'),
@@ -448,7 +453,7 @@ def add_enable_option(name, enabled):
             raise EasyBuildError("Did not find a supported BLAS in dependencies. Don't know which BLAS lib to use")
 
         available_dependency_options = EB_PyTorch.get_dependency_options_for_version(self.version)
-        dependency_names = {dep['name'] for dep in self.cfg.dependencies()}
+        dependency_names = self.cfg.dependency_names()
         not_used_dep_names = []
         for enable_opt, dep_name in available_dependency_options:
             if dep_name is None:
@@ -457,6 +462,9 @@ def add_enable_option(name, enabled):
                 options.append(enable_opt)
             else:
                 not_used_dep_names.append(dep_name)
+                # Explicitely toggle to avoid picking up system libs, restricted to 2.7+ to avoid retesting older ECs
+                if pytorch_version >= '2.7' and enable_opt[-1] in ('0', '1'):
+                    options.append(enable_opt[:-1] + ('0' if enable_opt[-1] == '1' else '1'))
         self.log.info('Did not enable options for the following dependencies as they are not used in the EC: %s',
                       not_used_dep_names)
 
@@ -510,7 +518,8 @@ def add_enable_option(name, enabled):
                 options.append('USE_FBGEMM=0')
 
         # Metal only supported on IOS which likely doesn't work with EB, so disabled
-        options.append('USE_METAL=0')
+        if pytorch_version < '2.4':  # Removed in 2.4
+            options.append('USE_METAL=0')
 
         build_type = self.cfg.get('build_type')
         if build_type is None:

From 906d8cf00e9b7e1096e92086370159f703abd145 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Fri, 19 Sep 2025 10:17:59 +0200
Subject: [PATCH 12/20] Symlink NCCL library when added as a build dependency

As PyTorch is sensitive to specific NCCL versions one approach is to use
it as a build dependency only and add an rpath to it after copying it
into a (non-standard) folder inside the PyTorch module.
This is similar to the PyPI package that depends on various
nvidia-packages and adds relative rpaths to ensure they are used when
loading the torch package/libraries.
---
 easybuild/easyblocks/p/pytorch.py | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index e7b03046688..af204a3b04b 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -47,8 +47,8 @@
 from easybuild.tools import LooseVersion
 from easybuild.tools.build_log import EasyBuildError, print_warning
 from easybuild.tools.config import ERROR, build_option
-from easybuild.tools.filetools import apply_regex_substitutions, mkdir, symlink
-from easybuild.tools.modules import get_software_root, get_software_version
+from easybuild.tools.filetools import apply_regex_substitutions, mkdir, symlink, copy
+from easybuild.tools.modules import get_software_root, get_software_version, get_software_libdir
 from easybuild.tools.run import run_shell_cmd
 from easybuild.tools.systemtools import POWER, get_cpu_architecture
 
@@ -795,6 +795,32 @@ def test_cases_step(self):
         self._set_cache_dirs()
         super().test_cases_step()
 
+    def install_step(self):
+        """Set rpath if required"""
+        super().install_step()
+        # If NCCL is used as a build dependency only, we need to make sure it is found at runtime
+        if 'NCCL' in self.cfg.dependency_names(build_only=True):
+            if 'patchelf' not in self.cfg.dependency_names():
+                raise EasyBuildError("PyTorch requires patchelf to set the RPATH of the NCCL"
+                                     " as NCCL is only a build dependency")
+            nccl_libdir = get_software_libdir('NCCL', full_path=True)
+            if not nccl_libdir:
+                raise EasyBuildError("Did not find libdir of NCCL installation")
+            nccl_libs = list(Path(nccl_libdir).glob('libnccl.so*'))
+            if not nccl_libs:
+                raise EasyBuildError("Did not find any NCCL libraries in %s", nccl_libdir)
+            torch_libs = list(Path(self.installdir).glob('lib/**/torch/**/*.so'))
+            if not torch_libs:
+                raise EasyBuildError("Did not find any PyTorch libraries in %s", self.installdir)
+            nvidia_libs_dir = os.path.join(self.installdir, 'nvidia_libs')
+            mkdir(nvidia_libs_dir, parents=True)
+            copy(nccl_libs, nvidia_libs_dir, symlinks=True)
+
+            for lib in torch_libs:
+                rpath = os.path.relpath(nvidia_libs_dir, lib.parent)
+                run_shell_cmd(['patchelf', '--force-rpath', '--add-rpath', f'$ORIGIN/{rpath}', str(lib)],
+                              use_bash=False)
+
     def sanity_check_step(self, *args, **kwargs):
         """Custom sanity check for PyTorch"""
 

From a986775e49c383d2bdf03f7fa5849b134708d100 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Fri, 19 Sep 2025 10:20:21 +0200
Subject: [PATCH 13/20] Revert "Symlink NCCL library when added as a build
 dependency"

This reverts commit 906d8cf00e9b7e1096e92086370159f703abd145.
---
 easybuild/easyblocks/p/pytorch.py | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index af204a3b04b..e7b03046688 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -47,8 +47,8 @@
 from easybuild.tools import LooseVersion
 from easybuild.tools.build_log import EasyBuildError, print_warning
 from easybuild.tools.config import ERROR, build_option
-from easybuild.tools.filetools import apply_regex_substitutions, mkdir, symlink, copy
-from easybuild.tools.modules import get_software_root, get_software_version, get_software_libdir
+from easybuild.tools.filetools import apply_regex_substitutions, mkdir, symlink
+from easybuild.tools.modules import get_software_root, get_software_version
 from easybuild.tools.run import run_shell_cmd
 from easybuild.tools.systemtools import POWER, get_cpu_architecture
 
@@ -795,32 +795,6 @@ def test_cases_step(self):
         self._set_cache_dirs()
         super().test_cases_step()
 
-    def install_step(self):
-        """Set rpath if required"""
-        super().install_step()
-        # If NCCL is used as a build dependency only, we need to make sure it is found at runtime
-        if 'NCCL' in self.cfg.dependency_names(build_only=True):
-            if 'patchelf' not in self.cfg.dependency_names():
-                raise EasyBuildError("PyTorch requires patchelf to set the RPATH of the NCCL"
-                                     " as NCCL is only a build dependency")
-            nccl_libdir = get_software_libdir('NCCL', full_path=True)
-            if not nccl_libdir:
-                raise EasyBuildError("Did not find libdir of NCCL installation")
-            nccl_libs = list(Path(nccl_libdir).glob('libnccl.so*'))
-            if not nccl_libs:
-                raise EasyBuildError("Did not find any NCCL libraries in %s", nccl_libdir)
-            torch_libs = list(Path(self.installdir).glob('lib/**/torch/**/*.so'))
-            if not torch_libs:
-                raise EasyBuildError("Did not find any PyTorch libraries in %s", self.installdir)
-            nvidia_libs_dir = os.path.join(self.installdir, 'nvidia_libs')
-            mkdir(nvidia_libs_dir, parents=True)
-            copy(nccl_libs, nvidia_libs_dir, symlinks=True)
-
-            for lib in torch_libs:
-                rpath = os.path.relpath(nvidia_libs_dir, lib.parent)
-                run_shell_cmd(['patchelf', '--force-rpath', '--add-rpath', f'$ORIGIN/{rpath}', str(lib)],
-                              use_bash=False)
-
     def sanity_check_step(self, *args, **kwargs):
         """Custom sanity check for PyTorch"""
 

From 4feb3a9d4864b41d1619720cc399f3963d152fb2 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Mon, 20 Oct 2025 11:13:58 +0200
Subject: [PATCH 14/20] Use raise-from for better error reporting

---
 easybuild/easyblocks/p/pytorch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index e7b03046688..ceaa33055ff 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -649,7 +649,7 @@ def test_step(self):
             try:
                 xml_results = get_test_results(test_reports_path)
             except ValueError as e:
-                raise EasyBuildError(f"Failed to parse test results at {test_reports_path}: {e}")
+                raise EasyBuildError(f"Failed to parse test results at {test_reports_path}: {e}") from e
             if not xml_results:
                 files = [file for file in test_reports_path.rglob('*.*') if file.is_file()]
                 if files:
@@ -1073,7 +1073,7 @@ def parse_test_result_file(xml_file: Path) -> List[TestSuite]:
                           )
             )
     except Exception as e:
-        raise ValueError(f"Failed to parse test result file '{xml_file}': {e}")
+        raise ValueError(f"Failed to parse test result file '{xml_file}': {e}") from e
     return test_suites
 
 

From c98ab49df8bb680a67d2181a385e9e6ca5b0344e Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Mon, 20 Oct 2025 11:14:15 +0200
Subject: [PATCH 15/20] Don't fail for incomplete testcase tags

Some are missing all tags except for 'time'.
Just ignore those.
---
 easybuild/easyblocks/p/pytorch.py                          | 7 ++++++-
 .../inductor.test_cudagraph_trees-bbc64.xml                | 7 +++++++
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/inductor.test_cudagraph_trees/inductor.test_cudagraph_trees-bbc64.xml

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index ceaa33055ff..19b660615b6 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -918,7 +918,12 @@ def parse_test_cases(test_suite_el: ET.Element) -> List[TestCase]:
     """Extract all test cases from the testsuite XML element"""
     test_cases: List[TestCase] = []
     for testcase in test_suite_el.iterfind("testcase"):
-        classname = testcase.attrib["classname"]
+        try:
+            classname = testcase.attrib["classname"]
+        except KeyError as e:
+            if any(tag in testcase.attrib for tag in ('name', 'file')):
+                raise ValueError(f"Missing 'classname' attribute in testcase (Attributes: '{testcase.attrib}')") from e
+            continue  # Skip invalid testcase entries without classname
         test_name = f'{classname}.{testcase.attrib["name"]}'
         # Note: It is possible that a test has (the same?) element multiple times, likely when using variants.
         # Ignore that and only check if it has one of the failure tags at least once.
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_cudagraph_trees/inductor.test_cudagraph_trees-bbc64.xml b/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_cudagraph_trees/inductor.test_cudagraph_trees-bbc64.xml
new file mode 100644
index 00000000000..3ce296ae6a6
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_cudagraph_trees/inductor.test_cudagraph_trees-bbc64.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="0" skipped="0" tests="0" time="4.2">
+    <!-- Incomplete record should be ignored -->
+    <testcase time="4.2"/>
+  </testsuite>
+</testsuites>

From 6de55c465866e003c9b579d56dd0978212c38d9e Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Mon, 20 Oct 2025 12:49:36 +0200
Subject: [PATCH 16/20] Handle skip-and-fail mismatch

PyTorch reruns single tests by skipping portions of the test before that.
If those other tests don't succeed the parser will error out during
merging as it will see a test that was skipped and failed.
Handle that by ignoring the skipped test result during merge.
---
 easybuild/easyblocks/p/pytorch.py                   |  8 ++++----
 test/easyblocks/easyblock_specific.py               |  5 ++++-
 .../inductor.test_aot_inductor_arrayref-bfd31.xml   | 13 +++++++++++++
 .../inductor.test_aot_inductor_arrayref-bfd31_2.xml | 13 +++++++++++++
 4 files changed, 34 insertions(+), 5 deletions(-)
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/inductor.test_aot_inductor_arrayref/inductor.test_aot_inductor_arrayref-bfd31.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-pytest/inductor.test_aot_inductor_arrayref/inductor.test_aot_inductor_arrayref-bfd31_2.xml

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index 19b660615b6..aed270d7aef 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -1096,11 +1096,11 @@ def merge_test_suites(test_suites: Iterable[TestSuite]) -> TestSuite:
             except KeyError:
                 result_suite.add_test(current_test)
             else:
-                if (existing_test.state == TestState.SKIPPED) != (current_test.state == TestState.SKIPPED):
-                    raise ValueError(f"Mismatch in whether test was skipped or not in suite {result_suite.name}: "
-                                     f"{existing_test} vs. {current_test}")
-                # If test was rerun and succeeded use that
                 if current_test.state == TestState.SUCCESS and existing_test.state != TestState.SUCCESS:
+                    # If test was rerun and succeeded use that
+                    result_suite.replace_test(current_test)
+                elif existing_test.state == TestState.SKIPPED and current_test.state != TestState.SKIPPED:
+                    # If test was skipped but later run use that
                     result_suite.replace_test(current_test)
     return result_suite
 
diff --git a/test/easyblocks/easyblock_specific.py b/test/easyblocks/easyblock_specific.py
index 766d27246c0..6d3ae2fdbca 100644
--- a/test/easyblocks/easyblock_specific.py
+++ b/test/easyblocks/easyblock_specific.py
@@ -523,7 +523,7 @@ def test_pytorch_test_log_parsing(self):
             self.assertEqual((name, suite.summary), (name, results2[name].summary))
         del results2
 
-        self.assertEqual(len(results), 14)
+        self.assertEqual(len(results), 15)
 
         # 2 small test suites used as a smoke test using a most features
         self.assertIn('backends/xeon/test_launch', results)
@@ -556,6 +556,7 @@ def test_pytorch_test_log_parsing(self):
             distributed/tensor/test_dtensor_ops: 0 failed, 2 passed, 2 skipped, 0 errors
             dynamo/test_dynamic_shapes: 3 failed, 14 passed, 0 skipped, 0 errors
             dynamo/test_misc: 1 failed, 9 passed, 0 skipped, 0 errors
+            inductor/test_aot_inductor_arrayref: 2 failed, 0 passed, 0 skipped, 0 errors
             inductor/test_cudagraph_trees: 1 failed, 0 passed, 0 skipped, 0 errors
             jit/test_builtins: 0 failed, 1 passed, 0 skipped, 0 errors
             test_autoload: 0 failed, 1 passed, 1 skipped, 0 errors
@@ -566,6 +567,8 @@ def test_pytorch_test_log_parsing(self):
                                  for suite in results.values()
                                  for test in suite.get_tests()))
         self.assertEqual(tests, textwrap.dedent("""
+            AOTInductorTestABICompatibleCpuWithStackAllocation.test_fail_and_skip: failure
+            AOTInductorTestABICompatibleCpuWithStackAllocation.test_skip_and_fail: failure
             CudaGraphTreeTests.test_workspace_allocation_error: failure
             DistQuantizationTests.test_all_gather_fp16: success
             DistQuantizationTests.test_all_gather_fp16: success
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_aot_inductor_arrayref/inductor.test_aot_inductor_arrayref-bfd31.xml b/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_aot_inductor_arrayref/inductor.test_aot_inductor_arrayref-bfd31.xml
new file mode 100644
index 00000000000..b2b186b80bd
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_aot_inductor_arrayref/inductor.test_aot_inductor_arrayref-bfd31.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="1" tests="2" time="4.2">
+    <!-- Tests that failed then were skipped (e.g. because other test gets rerun) and vice versa
+    See suffixed result file for pendant. -->
+    <testcase classname="AOTInductorTestABICompatibleCpuWithStackAllocation" name="test_skip_and_fail" time="4.2" file="inductor/test_aot_inductor.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+    <testcase classname="AOTInductorTestABICompatibleCpuWithStackAllocation" name="test_fail_and_skip" time="4.2" file="inductor/test_aot_inductor.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+  </testsuite>
+</testsuites>
diff --git a/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_aot_inductor_arrayref/inductor.test_aot_inductor_arrayref-bfd31_2.xml b/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_aot_inductor_arrayref/inductor.test_aot_inductor_arrayref-bfd31_2.xml
new file mode 100644
index 00000000000..37ff24b617b
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-pytest/inductor.test_aot_inductor_arrayref/inductor.test_aot_inductor_arrayref-bfd31_2.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<testsuites>
+  <testsuite name="pytest" errors="0" failures="1" skipped="1" tests="2" time="4.2">
+    <!-- Tests that failed then were skipped (e.g. because other test gets rerun) and vice versa
+    See non-suffixed result file for pendant. -->
+    <testcase classname="AOTInductorTestABICompatibleCpuWithStackAllocation" name="test_skip_and_fail" time="4.2" file="inductor/test_aot_inductor.py">
+      <failure message="...">[snip]</failure>
+    </testcase>
+    <testcase classname="AOTInductorTestABICompatibleCpuWithStackAllocation" name="test_fail_and_skip" time="4.2" file="inductor/test_aot_inductor.py">
+      <skipped message="...">[snip]</skipped>
+    </testcase>
+  </testsuite>
+</testsuites>

From bacadb86126ac6d31e08fb89e4be31066911f65c Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Wed, 22 Oct 2025 13:31:05 +0200
Subject: [PATCH 17/20] Update for 2.8+

---
 easybuild/easyblocks/p/pytorch.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index aed270d7aef..1c21b60deee 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -400,18 +400,19 @@ def configure_step(self):
                                           [(r'(default=_get_test_report_path\(\) if) IS(_IN)?_CI else None',
                                             fr'\1 os.getenv("{self.GENERATE_TEST_REPORT_VAR_NAME}") else None')],
                                           backup=False, on_missing_match=ERROR)
-                if pytorch_version >= '2.1.0':
-                    run_test_subs = [(r'if IS_CI:\n\s+# Add the option to generate XML test report.*',
-                                      'if TEST_SAVE_XML:\n')]
-                else:
-                    run_test_subs = [
-                         (r'from torch.testing._internal.common_utils import\s+\(\n\s+',
-                          r'\g<0>get_report_path, '),
-                         (r'# If using pytest.*\n\s+if options.pytest:\n\s+unittest_args = \[',
-                          r'\g<0>"--junit-xml-reruns", get_report_path(pytest=True)] + ['),
-                    ]
-                apply_regex_substitutions('test/run_test.py', run_test_subs, backup=False, on_missing_match=ERROR,
-                                          single_line=False)
+                if pytorch_version < '2.8.0':
+                    if pytorch_version >= '2.1.0':
+                        run_test_subs = [(r'if IS_CI:\n\s+# Add the option to generate XML test report.*',
+                                          'if TEST_SAVE_XML:\n')]
+                    else:
+                        run_test_subs = [
+                             (r'from torch.testing._internal.common_utils import\s+\(\n\s+',
+                              r'\g<0>get_report_path, '),
+                             (r'# If using pytest.*\n\s+if options.pytest:\n\s+unittest_args = \[',
+                              r'\g<0>"--junit-xml-reruns", get_report_path(pytest=True)] + ['),
+                        ]
+                    apply_regex_substitutions('test/run_test.py', run_test_subs, backup=False, on_missing_match=ERROR,
+                                              single_line=False)
                 self.has_xml_test_reports = True
 
         # Gather default options. Will be checked against (and can be overwritten by) custom_opts

From de6a86f215672f6a3322b0a21a48e64330300feb Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Fri, 7 Nov 2025 17:02:35 +0100
Subject: [PATCH 18/20] Add exception for "suitename" of "-c"

In PyTorch `test_testing.py` it runs a subtest via Python code, i.e. as `python -c`
This shows up in the test report path and as not having a `file`
attribute for the <testcase> tag.
`determine_suite_name` fails in `reported_file = os.path.basename(file_attribute.pop())` with
> KeyError: 'pop from an empty set'

Simply ignore those.
---
 easybuild/easyblocks/p/pytorch.py             | 22 ++++++++++++++-----
 .../-c/TEST-TestFooCPU-20251030150550.xml     |  7 ++++++
 ...test.suite._ErrorHolder-20251030150550.xml | 11 ++++++++++
 3 files changed, 34 insertions(+), 6 deletions(-)
 create mode 100644 test/pytorch_test_logs/test-reports/python-unittest/-c/TEST-TestFooCPU-20251030150550.xml
 create mode 100644 test/pytorch_test_logs/test-reports/python-unittest/-c/TEST-unittest.suite._ErrorHolder-20251030150550.xml

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index 1c21b60deee..b7a23029b50 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -39,7 +39,7 @@
 from itertools import chain, groupby
 from operator import attrgetter
 from pathlib import Path
-from typing import Dict, Iterable, List
+from typing import Dict, Iterable, List, Optional
 
 import easybuild.tools.environment as env
 from easybuild.easyblocks.generic.pythonpackage import PythonPackage
@@ -942,20 +942,27 @@ def parse_test_cases(test_suite_el: ET.Element) -> List[TestCase]:
     return test_cases
 
 
-def determine_suite_name(xml_file: Path, test_suite_xml: List[ET.Element]) -> str:
+def determine_suite_name(xml_file: Path, test_suite_xml: List[ET.Element]) -> Optional[str]:
     """Determine main test suite name from path(s) to match against run_test.py output"""
     # Gather all file attributes from the test cases if set
     test_cases = [testcase for suite in test_suite_xml for testcase in suite.iterfind("testcase")]
-    file_attribute = {testcase.attrib.get("file") for testcase in test_cases}
-    file_attribute.discard(None)
     suite_name = xml_file.parent.name.replace('.', os.path.sep)  # Usually the suite name is the folder name
+
     if xml_file.name.startswith('TEST-'):
+        # A unittest test could be run directly (`python -c 'code...'`) in which case there is no name
+        if suite_name == '-c':
+            return None
         # Python unittest reports have 1 file per test class:
         # test-reports/python-unittest/test_package/TEST-test_repackage.TestRepackage-20250217120914.xml
         # -> test_repackage.py ran TestRepackage
         # test-reports/dist-gloo/distributed.algorithms.test_quantization/TEST-DistQuantizationTests-20250123170925.xml
         # -> distributed/algorithms/test_quantization ran DistQuantizationTests in dist-gloo variant
         # Just do a sanity check
+        file_attribute = {testcase.attrib.get("file") for testcase in test_cases}
+        file_attribute.discard(None)
+        if not file_attribute:  # Fallback to checking the <testsuite> tags
+            file_attribute = {suite.attrib.get("file") for suite in test_suite_xml}
+            file_attribute.discard(None)
         if len(file_attribute) > 1:
             raise ValueError(f"Found multiple reported files in unittest report of '{xml_file}': {file_attribute}")
         reported_file = os.path.basename(file_attribute.pop())
@@ -963,11 +970,12 @@ def determine_suite_name(xml_file: Path, test_suite_xml: List[ET.Element]) -> st
         name_parts = xml_file.name[len('TEST-'):].rsplit('-', 1)[0].rsplit('.', 2)
         # If there is only one part it is the class -> filename is in the suite name
         if len(name_parts) == 1:
-            test_file_name = os.path.basename(suite_name) + '.py'
+            test_file_name = os.path.basename(suite_name)
         else:
             # Note that multiple parts are possible for sub-test files:
             # TEST-jit.test_builtins.TestBuiltins (jit/test_builtins.py)
-            test_file_name = name_parts[-2] + '.py'
+            test_file_name = name_parts[-2]
+        test_file_name += '.py'
         if test_file_name != reported_file:
             raise ValueError(f"Unexpected file attributes in test cases of '{xml_file}'. "
                              f"Expected {test_file_name}, got {file_attribute}")
@@ -1036,6 +1044,8 @@ def parse_test_result_file(xml_file: Path) -> List[TestSuite]:
 
         # Suite name to correctly deduplicate tests and match against run_test.py output
         suite_name = determine_suite_name(xml_file, test_suite_xml)
+        if suite_name is None:
+            return []
 
         test_suites: List[TestSuite] = []
 
diff --git a/test/pytorch_test_logs/test-reports/python-unittest/-c/TEST-TestFooCPU-20251030150550.xml b/test/pytorch_test_logs/test-reports/python-unittest/-c/TEST-TestFooCPU-20251030150550.xml
new file mode 100644
index 00000000000..4afd9909341
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-unittest/-c/TEST-TestFooCPU-20251030150550.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite name="TestFooCPU-20251030150550" tests="1" file=".py" time="0.013" timestamp="2025-10-30T15:05:50" failures="0" errors="0" skipped="0">
+	<testcase classname="TestFooCPU" name="test_bar_cpu" time="0.013" timestamp="2025-10-30T15:05:50">
+		<system-out><![CDATA[called with TestFooCPU
+]]></system-out>
+	</testcase>
+</testsuite>
diff --git a/test/pytorch_test_logs/test-reports/python-unittest/-c/TEST-unittest.suite._ErrorHolder-20251030150550.xml b/test/pytorch_test_logs/test-reports/python-unittest/-c/TEST-unittest.suite._ErrorHolder-20251030150550.xml
new file mode 100644
index 00000000000..2015b560986
--- /dev/null
+++ b/test/pytorch_test_logs/test-reports/python-unittest/-c/TEST-unittest.suite._ErrorHolder-20251030150550.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite name="unittest.suite._ErrorHolder-20251030150550" tests="1" file="unittest/suite.py" time="0.000" timestamp="0001-01-01T00:00:00" failures="0" errors="1" skipped="0">
+	<testcase classname="" name="tearDownClass (__main__.TestFooCPU)" time="0.000" timestamp="0001-01-01T00:00:00">
+		<error type="RuntimeError" message="called with TestFooCPU"><![CDATA[Traceback (most recent call last):
+  File "/tmp/easybuild-tmp/eb-ek3ahsid/tmpuv_fi4bt/lib/python3.12/site-packages/torch/testing/_internal/common_device_type.py", line 901, in _tearDownClass
+    generic_test_class.tearDownClass.__func__(cls)
+  File "<string>", line 16, in tearDownClass
+RuntimeError: called with TestFooCPU
+]]></error>
+	</testcase>
+</testsuite>

From 174b7bbcd2e69f2520de3943a411f51133b2af5d Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Mon, 10 Nov 2025 12:06:43 +0100
Subject: [PATCH 19/20] Add CLI arg to sort suites by custom attribute

---
 easybuild/easyblocks/p/pytorch.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index b7a23029b50..80540204d5a 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -1133,6 +1133,13 @@ def get_test_results(folder: Path) -> Dict[str, TestSuite]:
 
 
 def main(arg: Path):
+    # Get attribute on which to sort suites
+    try:
+        sort_key = sys.argv[sys.argv.index('--sort') + 1]
+    except ValueError:
+        sort_key = next((arg.split('=', 1)[1] for arg in sys.argv if arg.startswith('--sort=')), None)
+    if not sort_key:
+        sort_key = 'name'
     if arg.is_file():
         content = arg.read_text()
         m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content)
@@ -1152,15 +1159,16 @@ def main(arg: Path):
         raise RuntimeError(msg)
     else:
         results = get_test_results(Path(arg))
-        print(f"Found {len(results)} test suites:")
-        for suite in results.values():
+        print(f"Found {len(results)} test suites (sorted by {sort_key}):")
+        sorted_suites = sorted(results.values(), key=lambda suite: getattr(suite, sort_key))
+        for suite in sorted_suites:
             print(f"Suite {suite.name} {suite.num_tests}:\t{suite.summary}")
         print("Total tests:", sum(suite.num_tests for suite in results.values()))
         print("Total failures:", sum(suite.failures for suite in results.values()))
         print("Total skipped:", sum(suite.skipped for suite in results.values()))
         print("Total errors:", sum(suite.errors for suite in results.values()))
-        failed_suites = [suite.name for suite in results.values() if suite.failures + suite.errors > 0]
-        print(f"Failed suites ({len(failed_suites)}):\n\t" + '\n\t'.join(sorted(failed_suites)))
+        failed_suites = [suite.name for suite in sorted_suites if suite.failures + suite.errors > 0]
+        print(f"Failed suites ({len(failed_suites)}):\n\t" + '\n\t'.join(failed_suites))
         failed_tests = sum((suite.get_failed_tests() for suite in results.values()), [])
         print(f"Failed tests ({len(failed_tests)}):\n\t" + '\n\t'.join(sorted(failed_tests)))
         errored_tests = sum((suite.get_errored_tests() for suite in results.values()), [])

From 79fa0c582fedc7e2d7fc26b78e08f260d0d3f064 Mon Sep 17 00:00:00 2001
From: Alexander Grund <alexander.grund@tu-dresden.de>
Date: Mon, 10 Nov 2025 12:14:03 +0100
Subject: [PATCH 20/20] Show number of failed tests in list of failed suites

---
 easybuild/easyblocks/p/pytorch.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
index 80540204d5a..8a73b611c5e 100755
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -1162,13 +1162,14 @@ def main(arg: Path):
         print(f"Found {len(results)} test suites (sorted by {sort_key}):")
         sorted_suites = sorted(results.values(), key=lambda suite: getattr(suite, sort_key))
         for suite in sorted_suites:
-            print(f"Suite {suite.name} {suite.num_tests}:\t{suite.summary}")
+            print(f"Suite {suite.name}:\t{suite.num_tests} tests, {suite.summary}")
         print("Total tests:", sum(suite.num_tests for suite in results.values()))
         print("Total failures:", sum(suite.failures for suite in results.values()))
         print("Total skipped:", sum(suite.skipped for suite in results.values()))
         print("Total errors:", sum(suite.errors for suite in results.values()))
-        failed_suites = [suite.name for suite in sorted_suites if suite.failures + suite.errors > 0]
-        print(f"Failed suites ({len(failed_suites)}):\n\t" + '\n\t'.join(failed_suites))
+        failed_suites = [suite for suite in sorted_suites if suite.failures + suite.errors > 0]
+        print(f"Failed suites ({len(failed_suites)}):\n\t" + '\n\t'.join(
+            f'{suite.name} ({suite.failures + suite.errors}/{suite.num_tests})' for suite in failed_suites))
         failed_tests = sum((suite.get_failed_tests() for suite in results.values()), [])
         print(f"Failed tests ({len(failed_tests)}):\n\t" + '\n\t'.join(sorted(failed_tests)))
         errored_tests = sum((suite.get_errored_tests() for suite in results.values()), [])