diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
old mode 100644
new mode 100755
index 7898be4d62f..e90b3d1b18b
--- a/easybuild/easyblocks/p/pytorch.py
+++ b/easybuild/easyblocks/p/pytorch.py
@@ -1,3 +1,4 @@
+#! /usr/bin/env python
 ##
 # Copyright 2020-2025 Ghent University
 #
@@ -32,14 +33,22 @@
 import re
 import sys
 import tempfile
+import xml.etree.ElementTree as ET
+from collections import Counter
+from enum import Enum
+from itertools import chain, groupby
+from operator import attrgetter
+from pathlib import Path
+from typing import Dict, Iterable, List
 import easybuild.tools.environment as env
 from easybuild.tools import LooseVersion
 from easybuild.easyblocks.generic.pythonpackage import PythonPackage
 from easybuild.framework.easyconfig import CUSTOM
 from easybuild.tools.build_log import EasyBuildError, print_warning
-from easybuild.tools.config import build_option
+from easybuild.tools.config import build_option, ERROR
 from easybuild.tools.filetools import apply_regex_substitutions, mkdir, symlink
 from easybuild.tools.modules import get_software_root, get_software_version
+from easybuild.tools.run import run_cmd
 from easybuild.tools.systemtools import POWER, get_cpu_architecture
 
 
@@ -97,8 +106,8 @@ def find_failed_test_names(tests_out):
     # FAILED [0.0623s] dynamo/test_dynamic_shapes.py::DynamicShapesExportTests::test_predispatch -  [snip]
     regex = r"^(FAILED) (?:\[.*?\] )?(?:\w|/)+\.py.*::(test_.*?) - "
     failed_test_cases.extend(re.findall(regex, tests_out, re.M))
-    return FailedTestNames(error=sorted(m[1] for m in failed_test_cases if m[0] == 'ERROR'),
-                           fail=sorted(m[1] for m in failed_test_cases if m[0] != 'ERROR'))
+    return FailedTestNames(error=sorted(set(m[1] for m in failed_test_cases if m[0] == 'ERROR')),
+                           fail=sorted(set(m[1] for m in failed_test_cases if m[0] != 'ERROR')))
 
 
 def parse_test_log(tests_out):
@@ -126,6 +135,13 @@ def get_count_for_pattern(regex, text):
     # Remove empty lines to make RegExs below simpler
     tests_out = re.sub(r'^[ \t]*\n', '', tests_out, flags=re.MULTILINE)
 
+    # Examples: "test_jit_profiling failed! Received signal: SIGSEGV"
+    #           "test_weak failed!"
+    #           "test_decomp 1/1 failed!"
+    #           "test_pytree 1/1 failed! [Errno 2] No such file or directory: '/dev/shm/build/...'"
+    suite_failed_pattern = (r"^(?P<failed_test_suite_name>.*?) (?:\d+/\d+ )?failed!"
+                            r"(?: Received signal: (\w+)| \[Errno \d+\] .*)?\s*$")
+
     # Grep for patterns like:
     # Ran 219 tests in 67.325s
     #
@@ -134,7 +150,7 @@ def get_count_for_pattern(regex, text):
     regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n"
              r"FAILED \((?P<failure_summary>.*)\)$\n"
              r"(?:^(?:(?!failed!).)*$\n){0,5}"
-             r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")
+             + suite_failed_pattern)
 
     for m in re.finditer(regex, tests_out, re.M):
         # E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
@@ -167,7 +183,7 @@ def get_count_for_pattern(regex, text):
         r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* (\([0-9]+:[0-9]+:[0-9]+\) )?=+$\n"
         r"(?:.*skip info is located in the xml test reports.*\n)?"
         r"(?:.*FINISHED PRINTING LOG FILE.*\n)?"
-        r"(?P<failed_test_suite_name>.*) failed!$"
+        + suite_failed_pattern
     )
 
     for m in re.finditer(regex, tests_out, re.M):
@@ -191,7 +207,7 @@ def get_count_for_pattern(regex, text):
         r"^AssertionError: (?P<failure_summary>[0-9]+ unit test\(s\) failed):\n"
         r"(\s+.*\n)+"
         r"(((?!failed!).)*\n){0,5}"
-        r"(?P<failed_test_suite_name>.*) failed!$"
+        + suite_failed_pattern
     )
 
     for m in re.finditer(regex, tests_out, re.M):
@@ -223,9 +239,7 @@ def get_count_for_pattern(regex, text):
 
     # Gather all failed tests suites in case we missed any,
     # e.g. when it exited due to syntax errors or with a signal such as SIGSEGV
-    failed_suites_and_signal = set(
-        re.findall(r"^(?P<test_name>.*) failed!(?: Received signal: (\w+))?\s*$", tests_out, re.M)
-    )
+    failed_suites_and_signal = set(re.findall(suite_failed_pattern, tests_out, re.M))
 
     return TestResult(test_cnt=test_cnt, error_cnt=error_cnt, failure_cnt=failure_cnt,
                       failed_suites=failed_suites,
@@ -237,6 +251,8 @@ def get_count_for_pattern(regex, text):
 class EB_PyTorch(PythonPackage):
     """Support for building/installing PyTorch."""
 
+    GENERATE_TEST_REPORT_VAR_NAME = 'EASYBUILD_WRITE_PYTORCH_TEST_REPORTS'
+
     @staticmethod
     def extra_options():
         extra_vars = PythonPackage.extra_options()
@@ -261,6 +277,7 @@ def __init__(self, *args, **kwargs):
         """Constructor for PyTorch easyblock."""
         super(EB_PyTorch, self).__init__(*args, **kwargs)
         self.options['modulename'] = 'torch'
+        self.has_xml_test_reports = False
 
         self.tmpdir = tempfile.mkdtemp(suffix='-pytorch-build')
 
@@ -339,6 +356,38 @@ def configure_step(self):
 
         pytorch_version = LooseVersion(self.version)
 
+        self.has_xml_test_reports = False
+        if pytorch_version >= '1.10.0':
+            out, ec = run_cmd(self.python_cmd + " -c 'import xmlrunner'", log_ok=False)
+            if ec != 0:
+                msg = ("Python package xmlrunner (unittest-xml-reporting) not found in dependencies of the "
+                       "PyTorch EasyConfig, can't enable advanced test result checks. Output: " + out)
+                # We introduced this in the 2.3 EasyConfig
+                if pytorch_version >= '2.3':
+                    print_warning(msg)
+                else:
+                    self.log.warning(msg)
+            else:
+                # Replace the condition to enable the XML test reports to use our variable instead of $IS_CI/$IS_IN_CI
+                # The variable is used because the file gets installed and we shouldn't change the default behavior.
+                apply_regex_substitutions('torch/testing/_internal/common_utils.py',
+                                          [(r'(default=_get_test_report_path\(\) if) IS(_IN)?_CI else None',
+                                            fr'\1 os.getenv("{self.GENERATE_TEST_REPORT_VAR_NAME}") else None')],
+                                          backup=False, on_missing_match=ERROR)
+                if pytorch_version >= '2.1.0':
+                    run_test_subs = [(r'if IS_CI:\n\s+# Add the option to generate XML test report.*',
+                                      'if TEST_SAVE_XML:\n')]
+                else:
+                    run_test_subs = [
+                         (r'from torch.testing._internal.common_utils import\s+\(\n\s+',
+                          r'\g<0>get_report_path, '),
+                         (r'# If using pytest.*\n\s+if options.pytest:\n\s+unittest_args = \[',
+                          r'\g<0>"--junit-xml-reruns", get_report_path(pytest=True)] + ['),
+                    ]
+                apply_regex_substitutions('test/run_test.py', run_test_subs, backup=False, on_missing_match=ERROR,
+                                          single_line=False)
+                self.has_xml_test_reports = True
+
         # Gather default options. Will be checked against (and can be overwritten by) custom_opts
         options = ['PYTORCH_BUILD_VERSION=' + self.version, 'PYTORCH_BUILD_NUMBER=1']
 
@@ -475,6 +524,53 @@ def _set_cache_dir(self):
         mkdir(cache_dir, parents=True)
         env.setvar('XDG_CACHE_HOME', cache_dir)
 
+    def _compare_test_results(self, old_result, xml_result, old_failed_test_names, xml_failed_test_names):
+        """Compare test results parsed from stdout and XML files"""
+        diffs = []
+
+        old_suite_names = {suite.name for suite in old_result.failed_suites}
+        new_suite_names = {suite.name for suite in xml_result.failed_suites}
+        new_suites = new_suite_names - old_suite_names
+        missing_suites = old_suite_names - new_suite_names
+
+        diffs = []
+        if new_suites:
+            diffs.append(f'Found {len(new_suites)} new suites in XML files: {", ".join(sorted(new_suites))}')
+        if missing_suites:
+            diffs.append(f'Did not find {len(missing_suites)} suites in XML files: ' +
+                         ", ".join(sorted(missing_suites)))
+        if xml_result.test_cnt != old_result.test_cnt:
+            diffs.append(f'Different number of tests in XML files: {xml_result.test_cnt} != {old_result.test_cnt}')
+        if xml_result.error_cnt != old_result.error_cnt:
+            diffs.append(f'Different number of test errors in XML files: '
+                         f'{xml_result.error_cnt} != {old_result.error_cnt}')
+        if xml_result.failure_cnt != old_result.failure_cnt:
+            diffs.append(f'Different number of test failures in XML files: '
+                         f'{xml_result.failure_cnt} != {old_result.failure_cnt}')
+
+        def get_test_name_diff(lst_should, lst_is):
+            # Handle the case where one includes the class name and the other doesn't
+            return [name for name in lst_is
+                    if not any(name == name2 or
+                               name.endswith(f'.{name2}') or
+                               name2.endswith(f'.{name}') for name2 in lst_should)]
+        new_tests = get_test_name_diff(old_failed_test_names.error, xml_failed_test_names.error)
+        missing_tests = get_test_name_diff(xml_failed_test_names.error, old_failed_test_names.error)
+        if new_tests:
+            diffs.append(f'Found {len(new_tests)} new tests with errors in XML files: {", ".join(sorted(new_tests))}')
+        if missing_tests:
+            diffs.append(f'Did not find {len(missing_tests)} tests with errors in XML files: ' +
+                         ", ".join(sorted(missing_tests)))
+        new_tests = get_test_name_diff(old_failed_test_names.fail, xml_failed_test_names.fail)
+        missing_tests = get_test_name_diff(xml_failed_test_names.fail, old_failed_test_names.fail)
+        if new_tests:
+            diffs.append(f'Found {len(new_tests)} new failed tests in XML files: {", ".join(sorted(new_tests))}')
+        if missing_tests:
+            diffs.append(f'Did not find {len(missing_tests)} failed tests in XML files: ' +
+                         ", ".join(sorted(missing_tests)))
+        if diffs:
+            self.log.warning("Found differences when parsing stdout and XML files:\n\t" + "\n\t".join(diffs))
+
     def test_step(self):
         """Run unit tests"""
         self._set_cache_dir()
@@ -482,6 +578,8 @@ def test_step(self):
         env.setvar('SANDCASTLE', '1')
         # Skip this test(s) which is very flaky
         env.setvar('SKIP_TEST_BOTTLENECK', '1')
+        if self.has_xml_test_reports:
+            env.setvar(self.GENERATE_TEST_REPORT_VAR_NAME, '1')
         # Parse excluded_tests and flatten into space separated string
         excluded_tests = []
         for arch, tests in self.cfg['excluded_tests'].items():
@@ -505,8 +603,46 @@ def test_step(self):
 
         tests_out, tests_ec = parsed_test_result
 
-        # Show failed subtests, if any, to aid in debugging failures
         failed_test_names = find_failed_test_names(tests_out)
+        parsed_test_result = parse_test_log(tests_out)
+
+        if self.has_xml_test_reports:
+            test_reports_path = Path(self.start_dir) / 'test' / 'test-reports'
+            try:
+                xml_results = get_test_results(test_reports_path)
+            except ValueError as e:
+                raise EasyBuildError(f"Failed to parse test results at {test_reports_path}: {e}")
+            if not xml_results:
+                files = [file for file in test_reports_path.rglob('*.*') if file.is_file()]
+                if files:
+                    msg = f'Did not find any test result at {test_reports_path}. Files: {", ".join(files)}'
+                else:
+                    msg = f'Failed to find any test report files at {test_reports_path}'
+                raise EasyBuildError(msg)
+            missing_suites = [suite.name for suite in parsed_test_result.failed_suites
+                              if suite.name not in xml_results]
+            if missing_suites:
+                raise EasyBuildError('Parsing the test result files missed the following failed suites: %s',
+                                     ', '.join(sorted(missing_suites)))
+            # Replace results as the files should be more reliable than the parsed ones
+            new_result = TestResult(test_cnt=sum(suite.num_tests for suite in xml_results.values()),
+                                    error_cnt=sum(suite.errors for suite in xml_results.values()),
+                                    failure_cnt=sum(suite.failures for suite in xml_results.values()),
+                                    failed_suites=[suite for suite in xml_results.values()
+                                                   if suite.failures + suite.errors > 0],
+                                    terminated_suites=parsed_test_result.terminated_suites,
+                                    all_failed_suites=parsed_test_result.all_failed_suites)
+            new_failed_names = FailedTestNames(
+                error=list(chain.from_iterable(suite.get_errored_tests() for suite in xml_results.values())),
+                fail=list(chain.from_iterable(suite.get_failed_tests() for suite in xml_results.values()))
+            )
+            if LooseVersion(self.version) < '2.3':
+                # Show differences to results parsed from logfile. In 2.3+ the parsed values are no longer reliable
+                self._compare_test_results(parsed_test_result, new_result, failed_test_names, new_failed_names)
+            parsed_test_result = new_result
+            failed_test_names = new_failed_names
+
+        # Show failed subtests, if any, to aid in debugging failures
         if failed_test_names.error or failed_test_names.fail:
             msg = []
             if failed_test_names.error:
@@ -518,7 +654,6 @@ def test_step(self):
             self.log.warning("\n".join(msg))
 
         # Create clear summary report
-        parsed_test_result = parse_test_log(tests_out)
         # Use a list of messages we can later join together
         failure_msgs = ['\t%s (%s)' % (suite.name, suite.summary) for suite in parsed_test_result.failed_suites]
         # These were accounted for
@@ -568,13 +703,13 @@ def test_step(self):
             # so comparing to max_failed_tests cannot reasonably be done
             if failed_test_suites | set(parsed_test_result.terminated_suites) == all_failed_test_suites:
                 # All failed test suites are either counted or terminated with a signal
-                msg = ('Failing because these test suites were terminated which makes it impossible'
+                msg = ('Failing because these test suites were terminated which makes it impossible '
                        'to accurately count the failed tests: ')
                 msg += ", ".join("%s(%s)" % name_signal
                                  for name_signal in sorted(parsed_test_result.terminated_suites.items()))
             elif len(failed_test_suites) < len(all_failed_test_suites):
-                msg = ('Failing because not all failed tests could be determined. '
-                       'Tests failed to start or the test accounting in the PyTorch EasyBlock needs updating!\n'
+                msg = ('Failing because not all failed tests could be determined. Tests failed to start, crashed '
+                       'or the test accounting in the PyTorch EasyBlock needs updating!\n'
                        'Missing: ' + ', '.join(sorted(all_failed_test_suites - failed_test_suites)))
             else:
                 msg = ('Failing because there were unexpected failures detected: ' +
@@ -589,12 +724,12 @@ def test_step(self):
             # If no tests are supposed to fail don't print the explanation, just fail
             if max_failed_tests == 0:
                 raise EasyBuildError(failure_report)
-            msg = failure_report + '\n\n' + ' '.join([
-                "The PyTorch test suite is known to include some flaky tests,",
-                "which may fail depending on the specifics of the system or the context in which they are run.",
-                "For this PyTorch installation, EasyBuild allows up to %d tests to fail." % max_failed_tests,
+            msg = failure_report + '\n\n' + ''.join([
+                "The PyTorch test suite is known to include some flaky tests, ",
+                "which may fail depending on the specifics of the system or the context in which they are run.\n",
+                f"For this PyTorch installation, EasyBuild allows up to {max_failed_tests} tests to fail.\n",
                 "We recommend to double check that the failing tests listed above ",
-                "are known to be flaky, or do not affect your intended usage of PyTorch.",
+                "are known to be flaky, or do not affect your intended usage of PyTorch.\n",
                 "In case of doubt, reach out to the EasyBuild community (via GitHub, Slack, or mailing list).",
             ])
             # Print to console in addition to file,
@@ -641,23 +776,313 @@ def make_module_req_guess(self):
         return guesses
 
 
-def parse_logfile(file):
-    """Parse the EB logfile and print the failed tests"""
-    if not os.path.isfile(file):
-        raise RuntimeError('Expected a test result file to parse, got: ' + file)
-    with open(file, 'r') as f:
-        content = f.read()
-    m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content)
-    if m:
-        content = content[m.end():]
-        # Heuristic for next possible text added by EasyBuild
-        m = re.search(r'^== \d+-\d+-\d+ .* (pytorch\.py|EasyBuild)', content)
-        if m:
-            content = content[:m.start()]
+# ###################################### Code for parsing PyTorch Test XML files ######################################
+class TestState(Enum):
+    """Result of a test case run"""
+    SUCCESS, FAILURE, ERROR, SKIPPED = "success", "failure", "error", "skipped"
+
+
+if sys.version_info >= (3, 9):
+    from dataclasses import dataclass
 
-    print("Failed test names: ", find_failed_test_names(content))
-    print("Test result: ", parse_test_log(content))
+    @dataclass
+    class TestCase:
+        """Instance of a test method run"""
+        name: str
+        state: TestState
+        num_reruns: int
+else:
+    from collections import namedtuple
+    TestCase = namedtuple('TestCase', ('name', 'state', 'num_reruns'))
+
+
+class TestSuite:
+    """Collection of tests in the same test file"""
+
+    def __init__(self, name: str, errors: int, failures: int, skipped: int, test_cases: Dict[str, TestCase]):
+        num_per_state = Counter(test_case.state for test_case in test_cases.values())
+        if skipped != num_per_state[TestState.SKIPPED]:
+            raise ValueError(f'Expected {skipped} skipped tests but found {num_per_state[TestState.SKIPPED]}')
+        if failures != num_per_state[TestState.FAILURE]:
+            raise ValueError(f'Expected {failures} failed tests but found {num_per_state[TestState.FAILURE]}')
+        if errors != num_per_state[TestState.ERROR]:
+            raise ValueError(f'Expected {errors} errored tests but found {num_per_state[TestState.ERROR]}')
+
+        self.name = name
+        self.errors = errors
+        self.failures = failures
+        self.skipped = skipped
+        self.test_cases = test_cases
+
+    def __getitem__(self, name: str) -> TestCase:
+        """Return testcase by name"""
+        return self.test_cases[name]
+
+    def _adjust_count(self, state: TestState, val: int):
+        """Adjust the relevant state count"""
+        if state == TestState.FAILURE:
+            self.failures += val
+        elif state == TestState.SKIPPED:
+            self.skipped += val
+        elif state == TestState.ERROR:
+            self.errors += val
+        elif state != TestState.SUCCESS:
+            raise ValueError(f'Invalid state {state}')
+
+    @property
+    def num_tests(self) -> int:
+        """Return the total number of tests"""
+        return len(self.test_cases)
+
+    @property
+    def summary(self) -> str:
+        """Return a textual sumary"""
+        num_passed = len(self.test_cases) - self.errors - self.failures - self.skipped
+        return f'{self.failures} failed, {num_passed} passed, {self.skipped} skipped, {self.errors} errors'
+
+    def get_tests(self) -> Iterable[TestCase]:
+        """Return all test instances"""
+        return self.test_cases.values()
+
+    def add_test(self, test: TestCase):
+        """Add a test instance"""
+        if test.name in self.test_cases:
+            raise ValueError(f"Duplicate test case '{test}' in test suite {self.name}")
+        self.test_cases[test.name] = test
+        self._adjust_count(test.state, 1)
+
+    def replace_test(self, test: TestCase):
+        """Replace an existing test instance"""
+        existing_test = self.test_cases.pop(test.name)
+        self._adjust_count(existing_test.state, -1)
+        self.add_test(test)
+
+    def get_errored_tests(self) -> List[str]:
+        """Return a list of test names that exited with an error"""
+        return [test.name for test in self.test_cases.values() if test.state == TestState.ERROR]
+
+    def get_failed_tests(self) -> List[str]:
+        """Return a list of failed test names"""
+        return [test.name for test in self.test_cases.values() if test.state == TestState.FAILURE]
+
+
+def parse_test_cases(test_suite_el: ET.Element) -> List[TestCase]:
+    """Extract all test cases from the testsuite XML element"""
+    test_cases: List[TestCase] = []
+    for testcase in test_suite_el.iterfind("testcase"):
+        classname = testcase.attrib["classname"]
+        test_name = f'{classname}.{testcase.attrib["name"]}'
+        failed, errored, skipped = [testcase.find(tag) is not None for tag in ("failure", "error", "skipped")]
+        num_reruns = len(testcase.findall("rerun"))
+
+        if skipped:
+            if num_reruns > 0 or failed or errored:
+                raise ValueError(f"Invalid state for testcase '{test_name}'")
+            state = TestState.SKIPPED
+        else:
+            state = TestState.FAILURE if failed else TestState.ERROR if errored else TestState.SUCCESS
+
+        test_cases.append(TestCase(test_name, state=state, num_reruns=num_reruns))
+    return test_cases
+
+
+def determine_suite_name(xml_file: Path, test_suite_xml: List[ET.Element]) -> str:
+    """Determine main test suite name from path(s) to match against run_test.py output"""
+    # Gather all file attributes from the test cases if set
+    test_cases = [testcase for suite in test_suite_xml for testcase in suite.iterfind("testcase")]
+    file_attribute = {testcase.attrib.get("file") for testcase in test_cases}
+    file_attribute.discard(None)
+    suite_name = xml_file.parent.name.replace('.', os.path.sep)  # Usually the suite name is the folder name
+    if xml_file.name.startswith('TEST-'):
+        # Python unittest reports have 1 file per test class:
+        # test-reports/python-unittest/test_package/TEST-test_repackage.TestRepackage-20250217120914.xml
+        # -> test_repackage.py ran TestRepackage
+        # test-reports/dist-gloo/distributed.algorithms.test_quantization/TEST-DistQuantizationTests-20250123170925.xml
+        # -> distributed/algorithms/test_quantization ran DistQuantizationTests in dist-gloo variant
+        # Just do a sanity check
+        if len(file_attribute) > 1:
+            raise ValueError(f"Found multiple reported files in unittest report of '{xml_file}': {file_attribute}")
+        reported_file = os.path.basename(file_attribute.pop())
+
+        name_parts = xml_file.name[len('TEST-'):].rsplit('-', 1)[0].rsplit('.', 2)
+        # If there is only one part it is the class -> filename is in the suite name
+        if len(name_parts) == 1:
+            test_file_name = os.path.basename(suite_name) + '.py'
+        else:
+            # Note that multiple parts are possible for sub-test files:
+            # TEST-jit.test_builtins.TestBuiltins (jit/test_builtins.py)
+            test_file_name = name_parts[-2] + '.py'
+        if test_file_name != reported_file:
+            raise ValueError(f"Unexpected file attributes in test cases of '{xml_file}'. "
+                             f"Expected {test_file_name}, got {file_attribute}")
+    elif suite_name == 'run_test':
+        # Generic report, so try to infer from the class names which look like
+        # test.distributed.pipeline.sync.test_stream.TestGetDevice or
+        # test.distributed.pipeline.sync.test_pipe
+        # distributed.elastic.events.lib_test.RdzvEventLibTest
+        def extract_path(classname: str) -> str:
+            parts = classname.split('.')
+            if parts[0] == 'test':
+                if not parts[-1].startswith('test_') and 'Test' in parts[-1]:
+                    # last part is a real class name
+                    parts.pop()
+                return os.path.join(*parts[1:])
+            return None
+        possible_paths = {extract_path(testcase.attrib["classname"]) for testcase in test_cases}
+        possible_paths.discard(None)
+        if not possible_paths:
+            raise ValueError("Could not infer test suite name from class names for {xml_file}.")
+        # We can remove possible class names by only using the common part
+        suite_name = os.path.commonpath(possible_paths)
+        # Strip of common prefix to all classes, but keep the last part for uniqueness
+        non_classname_prefix = os.path.dirname(suite_name).replace(os.path.sep, '.') + '.'
+        for testcase in test_cases:
+            classname = testcase.attrib["classname"]
+            if classname.startswith(non_classname_prefix):
+                testcase.attrib["classname"] = classname[len(non_classname_prefix):]
+    else:
+        # Pytest reports, have the name in folder and file e.g.:
+        # distributed.pipeline.sync.skip.test_stash_pop/distributed.pipeline.sync.skip.test_stash_pop-052ae03efad18.xml
+        # -> distributed/pipeline/sync/skip/test_stash_pop
+        test_file_path = xml_file.name.rsplit('-', 1)[0].replace('.', os.path.sep)
+        if test_file_path != suite_name:
+            raise ValueError(f"Path from folder and filename should be equal. "
+                             f"Got: '{test_file_path}' != '{suite_name}'")
+    # Variant might be dist-gloo, dist-mpi or similar which is the same test code ran in different configurations!
+    variant = xml_file.parent.parent.name
+    if variant not in ('python-unittest', 'python-pytest'):
+        suite_name = os.path.join(variant, suite_name)
+    return suite_name
+
+
+def parse_test_result_file(xml_file: Path) -> List[TestSuite]:
+    """
+    Parses the given XML file into TestSuite and TestCase objects.
+
+    :param file_path: Path to an XML file storing test results.
+    :return: A list of TestSuite objects representing the parsed structure.
+    """
+    root = ET.parse(xml_file).getroot()
+
+    # Normalize root to be a list of test suite elements
+    if root.tag == "testsuites":
+        test_suite_xml: List[ET.Element] = root.findall("testsuite")
+    elif root.tag == "testsuite":
+        test_suite_xml = [root]
+    else:
+        raise ValueError("Root element must be <testsuites> or <testsuite>.")
+
+    # Suite name to correctly deduplicate tests and match against run_test.py output
+    suite_name = determine_suite_name(xml_file, test_suite_xml)
+
+    test_suites: List[TestSuite] = []
+
+    for test_suite in test_suite_xml:
+        errors = int(test_suite.attrib["errors"])
+        failures = int(test_suite.attrib["failures"])
+        skipped = int(test_suite.attrib["skipped"])
+        num_tests = int(test_suite.attrib["tests"])
+        if num_tests < failures + skipped + errors:
+            raise ValueError(f"Invalid test count: "
+                             f"{num_tests} tests, {failures} failures, {skipped} skipped, {errors} errors")
+
+        parsed_test_cases = parse_test_cases(test_suite)
+        if not parsed_test_cases:
+            # No data about the test cases or even the name of the suite, so ignore it
+            if num_tests > 0:
+                raise ValueError("Testsuite contains no test cases, but reports tests.")
+            continue
+
+        test_cases: Dict[str, TestCase] = {}
+        for test_case in parsed_test_cases:
+            if test_case.name in test_cases:
+                raise ValueError(f"Duplicate test case '{test_case}' in test suite {suite_name}")
+            test_cases[test_case.name] = test_case
+
+        if len(test_cases) != num_tests:
+            raise ValueError(f"Number of test cases does not match the total number of tests: "
+                             f"{len(test_cases)} vs. {num_tests}")
+        test_suites.append(
+            TestSuite(name=suite_name, test_cases=test_cases,
+                      errors=errors, failures=failures, skipped=skipped,
+                      )
+        )
+    return test_suites
+
+
+def merge_test_suites(test_suites: Iterable[TestSuite]) -> TestSuite:
+    """
+    Combine results for all given test suites into a single instance.
+    If there is only a single instance in the input, it is returned as is.
+    """
+    test_suites = iter(test_suites)
+    result_suite: TestSuite = next(test_suites)
+    for current_suite in test_suites:
+        for current_test in current_suite.get_tests():
+            try:
+                existing_test = result_suite[current_test.name]
+            except KeyError:
+                result_suite.add_test(current_test)
+            else:
+                if (existing_test.state == TestState.SKIPPED) != (current_test.state == TestState.SKIPPED):
+                    raise ValueError(f"Mismatch in whether test was skipped or not in suite {result_suite.name}: "
+                                     f"{existing_test} vs. {current_test}")
+                # If test was rerun and succeeded use that
+                if current_test.state == TestState.SUCCESS and existing_test.state != TestState.SUCCESS:
+                    result_suite.replace_test(current_test)
+    return result_suite
+
+
+def get_test_results(folder: Path) -> Dict[str, TestSuite]:
+    """Return a dictionary of test results contained in the folder"""
+    if folder.name.startswith('test-reports'):
+        folders = [folder]
+    else:
+        # Gather all folders containing test-reports which might be named "test-reports_1"
+        # Fallback to only the folder
+        folders = [cur_dir for cur_dir in folder.glob('test-reports*') if cur_dir.is_dir()] or [folder]
+
+    files = (file for folder in folders for file in folder.rglob('*.xml'))
+    test_suites = chain.from_iterable(parse_test_result_file(file) for file in files)
+    get_name = attrgetter('name')
+    test_suites = sorted(test_suites, key=get_name)
+    return {name: merge_test_suites(suites) for name, suites in groupby(test_suites, get_name)}
+
+
+def main(arg: Path):
+    if arg.is_file():
+        content = arg.read_text()
+        m = re.search(r'cmd .*python[^ ]* run_test\.py .* exited with exit code.*output', content)
+        if m:
+            content = content[m.end():]
+            # Heuristic for next possible text added by EasyBuild
+            m = re.search(r'^== \d+-\d+-\d+ .* (pytorch\.py|EasyBuild)', content)
+            if m:
+                content = content[:m.start()]
+
+        print("Failed test names: ", find_failed_test_names(content))
+        print("Test result: ", parse_test_log(content))
+    elif not arg.is_dir():
+        msg = f'Expected a test result file or folder with XMLs to parse, got: {arg}'
+        if not arg.exists():
+            msg += ' which does not exist'
+        raise RuntimeError(msg)
+    else:
+        results = get_test_results(Path(arg))
+        print(f"Found {len(results)} test suites:")
+        for suite in results.values():
+            print(f"Suite {suite.name} {suite.num_tests}:\t{suite.summary}")
+        print("Total tests:", sum(suite.num_tests for suite in results.values()))
+        print("Total failures:", sum(suite.failures for suite in results.values()))
+        print("Total skipped:", sum(suite.skipped for suite in results.values()))
+        print("Total errors:", sum(suite.errors for suite in results.values()))
+        failed_suites = [suite.name for suite in results.values() if suite.failures + suite.errors > 0]
+        print(f"Failed suites ({len(failed_suites)}):\n\t" + '\n\t'.join(sorted(failed_suites)))
+        failed_tests = sum((suite.get_failed_tests() for suite in results.values()), [])
+        print(f"Failed tests ({len(failed_tests)}):\n\t" + '\n\t'.join(sorted(failed_tests)))
+        errored_tests = sum((suite.get_errored_tests() for suite in results.values()), [])
+        print(f"Errored tests ({len(errored_tests)}):\n\t" + '\n\t'.join(sorted(errored_tests)))
 
 
 if __name__ == '__main__':
-    parse_logfile(sys.argv[1])
+    main(Path(sys.argv[1]))