From 98c7eab613579d22d4c1c0d8a3c379f9df9747c9 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Wed, 27 May 2026 13:57:05 +0530 Subject: [PATCH 01/11] Stream system test logs to disk during each docker phase Write build, solver, and fieldcompare output incrementally to system-tests-stdout.log and system-tests-stderr.log, and add phase logs (system-tests-build.log, -solver.log, -fieldcompare.log) so failures can be debugged before a test finishes. Extend CI artifact globs for the new log files. Fixes the log-file part of #790. --- .../generate_reference_results_workflow.yml | 3 + .github/workflows/run_testsuite_workflow.yml | 3 + tools/tests/systemtests/Systemtest.py | 287 +++++++++++------- 3 files changed, 186 insertions(+), 107 deletions(-) diff --git a/.github/workflows/generate_reference_results_workflow.yml b/.github/workflows/generate_reference_results_workflow.yml index 6ac87f392..9e712126b 100644 --- a/.github/workflows/generate_reference_results_workflow.yml +++ b/.github/workflows/generate_reference_results_workflow.yml @@ -98,4 +98,7 @@ jobs: path: | runs/*/system-tests-stdout.log runs/*/system-tests-stderr.log + runs/*/system-tests-build.log + runs/*/system-tests-solver.log + runs/*/system-tests-fieldcompare.log runs/*/*/system-tests_*.log diff --git a/.github/workflows/run_testsuite_workflow.yml b/.github/workflows/run_testsuite_workflow.yml index 085c5286e..d96df2c5f 100644 --- a/.github/workflows/run_testsuite_workflow.yml +++ b/.github/workflows/run_testsuite_workflow.yml @@ -92,6 +92,9 @@ jobs: path: | runs/*/system-tests-stdout.log runs/*/system-tests-stderr.log + runs/*/system-tests-build.log + runs/*/system-tests-solver.log + runs/*/system-tests-fieldcompare.log runs/*/*/system-tests_*.log - name: tidy up the docker run: | diff --git a/tools/tests/systemtests/Systemtest.py b/tools/tests/systemtests/Systemtest.py index f438eac03..36998ae6a 100644 --- a/tools/tests/systemtests/Systemtest.py +++ b/tools/tests/systemtests/Systemtest.py @@ -1,4 +1,5 @@ import subprocess +import threading from typing import List, Dict, Optional, Tuple from jinja2 import Environment, FileSystemLoader from dataclasses import dataclass, field @@ -24,6 +25,46 @@ DIFF_RESULTS_DIR = "diff-results" +COMBINED_STDOUT_LOG = "system-tests-stdout.log" +COMBINED_STDERR_LOG = "system-tests-stderr.log" +PHASE_LOG_FILES = { + "build": "system-tests-build.log", + "solver": "system-tests-solver.log", + "fieldcompare": "system-tests-fieldcompare.log", +} + + +class _SystemtestLogSink: + """Writes subprocess output incrementally to combined and phase log files.""" + + def __init__(self, system_test_dir: Path): + self._system_test_dir = system_test_dir + self._lock = threading.Lock() + self._stdout_path = system_test_dir / COMBINED_STDOUT_LOG + self._stderr_path = system_test_dir / COMBINED_STDERR_LOG + self._stdout_path.write_text("", encoding="utf-8") + self._stderr_path.write_text("", encoding="utf-8") + + def begin_phase(self, phase: str) -> None: + phase_path = self._system_test_dir / PHASE_LOG_FILES[phase] + phase_path.write_text(f"=== {phase} ===\n", encoding="utf-8") + + def append_stdout(self, line: str, phase: str) -> None: + with self._lock: + with self._stdout_path.open("a", encoding="utf-8") as log_file: + log_file.write(line + "\n") + phase_path = self._system_test_dir / PHASE_LOG_FILES[phase] + with phase_path.open("a", encoding="utf-8") as log_file: + log_file.write(line + "\n") + + def append_stderr(self, line: str, phase: str) -> None: + with self._lock: + with self._stderr_path.open("a", encoding="utf-8") as log_file: + log_file.write(line + "\n") + phase_path = self._system_test_dir / PHASE_LOG_FILES[phase] + with phase_path.open("a", encoding="utf-8") as log_file: + log_file.write(f"[stderr] {line}\n") + def slugify(value, allow_unicode=False): """ @@ -399,6 +440,94 @@ def __unpack_reference_results(self) -> Tuple[bool, str]: logging.error(error_message) return False, error_message + def __init_run_logs(self) -> None: + self._log_sink = _SystemtestLogSink(self.system_test_dir) + + def _run_docker_compose_subprocess( + self, + command: List[str], + phase: str, + timeout: int, + ) -> Tuple[int, List[str], List[str]]: + """ + Run a docker compose command, streaming stdout/stderr to log files as they arrive. + """ + stdout_data: List[str] = [] + stderr_data: List[str] = [] + log_sink = getattr(self, "_log_sink", None) + if log_sink is not None: + log_sink.begin_phase(phase) + logging.info(f"Docker compose {phase} for {self}") + + try: + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + start_new_session=True, + cwd=self.system_test_dir, + ) + except Exception as e: + logging.critical(f"Error starting docker compose {phase} command: {e}") + return 1, stdout_data, stderr_data + + def read_stream(stream, is_stderr: bool) -> None: + if stream is None: + return + for line in stream: + line = line.rstrip("\n\r") + if is_stderr: + stderr_data.append(line) + if log_sink is not None: + log_sink.append_stderr(line, phase) + else: + stdout_data.append(line) + if log_sink is not None: + log_sink.append_stdout(line, phase) + stream.close() + + stdout_thread = threading.Thread( + target=read_stream, args=(process.stdout, False), daemon=True) + stderr_thread = threading.Thread( + target=read_stream, args=(process.stderr, True), daemon=True) + stdout_thread.start() + stderr_thread.start() + + try: + exit_code = process.wait(timeout=timeout) + except KeyboardInterrupt as k: + process.kill() + stdout_thread.join(timeout=SHORT_TIMEOUT) + stderr_thread.join(timeout=SHORT_TIMEOUT) + raise KeyboardInterrupt from k + except subprocess.TimeoutExpired: + logging.critical( + f"Systemtest {self} timed out during docker compose {phase} " + f"after {timeout}s. Killing the process.") + process.kill() + try: + process.wait(timeout=SHORT_TIMEOUT) + except subprocess.TimeoutExpired: + pass + exit_code = process.returncode if process.returncode is not None else 1 + except Exception as e: + logging.critical( + f"Systemtest {self} had serious issues during docker compose {phase}: {e}") + process.kill() + try: + process.wait(timeout=SHORT_TIMEOUT) + except subprocess.TimeoutExpired: + pass + exit_code = process.returncode if process.returncode is not None else 1 + + stdout_thread.join(timeout=SHORT_TIMEOUT) + stderr_thread.join(timeout=SHORT_TIMEOUT) + if exit_code is None: + exit_code = process.poll() or 1 + return exit_code, stdout_data, stderr_data + def _run_field_compare(self): """ Executes the field comparison step after unpacking reference results. @@ -410,47 +539,31 @@ def _run_field_compare(self): time_start = time.perf_counter() unpack_success, unpack_error_message = self.__unpack_reference_results() if not unpack_success: + log_sink = getattr(self, "_log_sink", None) + if log_sink is not None: + log_sink.begin_phase("fieldcompare") + log_sink.append_stderr(unpack_error_message, "fieldcompare") elapsed_time = time.perf_counter() - time_start return FieldCompareResult(1, [], [unpack_error_message], self, elapsed_time) docker_compose_content = self.__get_field_compare_compose_file() - stdout_data = [] - stderr_data = [] with open(self.system_test_dir / "docker-compose.field_compare.yaml", 'w') as file: file.write(docker_compose_content) - try: - # Execute docker-compose command - process = subprocess.Popen(['docker', - 'compose', - '--file', - 'docker-compose.field_compare.yaml', - 'up', - '--exit-code-from', - 'field-compare'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - start_new_session=True, - cwd=self.system_test_dir) - - try: - stdout, stderr = process.communicate(timeout=self.timeout) - except KeyboardInterrupt as k: - process.kill() - raise KeyboardInterrupt from k - except Exception as e: - logging.critical( - f"Systemtest {self} had serious issues executing the docker compose command about to kill the docker compose command. Please check the logs! {e}") - process.kill() - process.communicate(timeout=SHORT_TIMEOUT) - stdout_data.extend(stdout.decode().splitlines()) - stderr_data.extend(stderr.decode().splitlines()) - process.poll() - elapsed_time = time.perf_counter() - time_start - return FieldCompareResult(process.returncode, stdout_data, stderr_data, self, elapsed_time) - except Exception as e: - logging.CRITICAL("Error executing docker compose command:", e) - elapsed_time = time.perf_counter() - time_start - return FieldCompareResult(1, stdout_data, stderr_data, self, elapsed_time) + exit_code, stdout_data, stderr_data = self._run_docker_compose_subprocess( + [ + 'docker', + 'compose', + '--file', + 'docker-compose.field_compare.yaml', + 'up', + '--exit-code-from', + 'field-compare', + ], + "fieldcompare", + self.timeout, + ) + elapsed_time = time.perf_counter() - time_start + return FieldCompareResult(exit_code, stdout_data, stderr_data, self, elapsed_time) def __archive_fieldcompare_diffs(self) -> None: """ @@ -502,41 +615,20 @@ def _build_docker(self): with open(self.system_test_dir / "docker-compose.tutorial.yaml", 'w') as file: file.write(docker_compose_content) - stdout_data = [] - stderr_data = [] - - try: - # Execute docker-compose command - process = subprocess.Popen(['docker', - 'compose', - '--file', - 'docker-compose.tutorial.yaml', - 'build'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - start_new_session=True, - cwd=self.system_test_dir) - - try: - stdout, stderr = process.communicate(timeout=GLOBAL_TIMEOUT) - except KeyboardInterrupt as k: - process.kill() - # process.send_signal(9) - raise KeyboardInterrupt from k - except Exception as e: - logging.critical( - f"systemtest {self} had serious issues building the docker images via the `docker compose build` command. About to kill the docker compose command. Please check the logs! {e}") - process.communicate(timeout=SHORT_TIMEOUT) - process.kill() - - stdout_data.extend(stdout.decode().splitlines()) - stderr_data.extend(stderr.decode().splitlines()) - elapsed_time = time.perf_counter() - time_start - return DockerComposeResult(process.returncode, stdout_data, stderr_data, self, elapsed_time) - except Exception as e: - logging.critical(f"Error executing docker compose build command: {e}") - elapsed_time = time.perf_counter() - time_start - return DockerComposeResult(1, stdout_data, stderr_data, self, elapsed_time) + exit_code, stdout_data, stderr_data = self._run_docker_compose_subprocess( + [ + 'docker', + 'compose', + '--file', + 'docker-compose.tutorial.yaml', + 'build', + '--progress=plain', + ], + "build", + GLOBAL_TIMEOUT, + ) + elapsed_time = time.perf_counter() - time_start + return DockerComposeResult(exit_code, stdout_data, stderr_data, self, elapsed_time) def _run_tutorial(self): """ @@ -547,49 +639,27 @@ def _run_tutorial(self): """ logging.debug(f"Running tutorial {self}") time_start = time.perf_counter() - stdout_data = [] - stderr_data = [] - try: - # Execute docker-compose command - process = subprocess.Popen(['docker', - 'compose', - '--file', - 'docker-compose.tutorial.yaml', - 'up'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - start_new_session=True, - cwd=self.system_test_dir) - - try: - stdout, stderr = process.communicate(timeout=self.timeout) - except KeyboardInterrupt as k: - process.kill() - # process.send_signal(9) - raise KeyboardInterrupt from k - except Exception as e: - logging.critical( - f"Systemtest {self} had serious issues executing the docker compose command about to kill the docker compose command. Please check the logs! {e}") - process.kill() - stdout, stderr = process.communicate(timeout=SHORT_TIMEOUT) - process.kill() - - stdout_data.extend(stdout.decode().splitlines()) - stderr_data.extend(stderr.decode().splitlines()) - elapsed_time = time.perf_counter() - time_start - return DockerComposeResult(process.returncode, stdout_data, stderr_data, self, elapsed_time) - except Exception as e: - logging.critical(f"Error executing docker compose up command: {e}") - elapsed_time = time.perf_counter() - time_start - return DockerComposeResult(1, stdout_data, stderr_data, self, elapsed_time) + exit_code, stdout_data, stderr_data = self._run_docker_compose_subprocess( + [ + 'docker', + 'compose', + '--file', + 'docker-compose.tutorial.yaml', + 'up', + ], + "solver", + self.timeout, + ) + elapsed_time = time.perf_counter() - time_start + return DockerComposeResult(exit_code, stdout_data, stderr_data, self, elapsed_time) def __repr__(self): return f"{self.tutorial.name} {self.case_combination}" def __write_logs(self, stdout_data: List[str], stderr_data: List[str]): - with open(self.system_test_dir / "system-tests-stdout.log", 'w') as stdout_file: + with open(self.system_test_dir / COMBINED_STDOUT_LOG, 'w', encoding="utf-8") as stdout_file: stdout_file.write("\n".join(stdout_data)) - with open(self.system_test_dir / "system-tests-stderr.log", 'w') as stderr_file: + with open(self.system_test_dir / COMBINED_STDERR_LOG, 'w', encoding="utf-8") as stderr_file: stderr_file.write("\n".join(stderr_data)) def __apply_max_time_override(self): @@ -632,6 +702,7 @@ def run(self, run_directory: Path): Runs the system test by generating the Docker Compose file, copying everything into a run folder, and executing docker-compose up. """ self.__prepare_for_run(run_directory) + self.__init_run_logs() std_out: List[str] = [] std_err: List[str] = [] @@ -697,6 +768,7 @@ def run_for_reference_results(self, run_directory: Path): Runs the system test by generating the Docker Compose files to generate the reference results """ self.__prepare_for_run(run_directory) + self.__init_run_logs() std_out: List[str] = [] std_err: List[str] = [] docker_build_result = self._build_docker() @@ -744,6 +816,7 @@ def run_only_build(self, run_directory: Path): Runs only the build commmand, for example to preheat the caches of the docker builder. """ self.__prepare_for_run(run_directory) + self.__init_run_logs() std_out: List[str] = [] std_err: List[str] = [] docker_build_result = self._build_docker() From d143329056b70ba9b1ec89698c1433ab1c75abb5 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Wed, 27 May 2026 13:57:12 +0530 Subject: [PATCH 02/11] Improve system test runner console readability Drop INFO prefix on routine log lines, put test index at the start of start/finish messages, and add blank lines between tests for easier scanning in CI and local runs. Addresses the console formatting part of #790. --- tools/tests/systemtests.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tools/tests/systemtests.py b/tools/tests/systemtests.py index 6debe18d0..36fb88f9d 100644 --- a/tools/tests/systemtests.py +++ b/tools/tests/systemtests.py @@ -10,6 +10,15 @@ from paths import PRECICE_TUTORIAL_DIR, PRECICE_TESTS_RUN_DIR, PRECICE_TESTS_DIR +class _ConsoleLogFormatter(logging.Formatter): + """Omit level prefix for INFO/DEBUG; keep it for warnings and errors.""" + + def format(self, record: logging.LogRecord) -> str: + if record.levelno >= logging.WARNING: + return f"{record.levelname}: {record.getMessage()}" + return record.getMessage() + + def main(): parser = argparse.ArgumentParser(description='systemtest') @@ -29,8 +38,10 @@ def main(): # Parse the command-line arguments args = parser.parse_args() - # Configure logging based on the provided log level - logging.basicConfig(level=args.log_level, format='%(levelname)s: %(message)s') + # Configure logging: no "INFO:" prefix on routine messages (#790) + handler = logging.StreamHandler() + handler.setFormatter(_ConsoleLogFormatter()) + logging.basicConfig(level=args.log_level, handlers=[handler]) print(f"Using log-level: {args.log_level}") @@ -72,17 +83,21 @@ def main(): if not systemtests_to_run: raise RuntimeError("Did not find any Systemtests to execute.") - logging.info(f"About to run the following systemtest in the directory {run_directory}:\n {systemtests_to_run}") + total = len(systemtests_to_run) + logging.info( + f"About to run {total} systemtest(s) in the directory {run_directory}:\n {systemtests_to_run}") results = [] for number, systemtest in enumerate(systemtests_to_run, start=1): - logging.info(f"Started running {systemtest}, {number}/{len(systemtests_to_run)}") + print() + logging.info(f"[{number}/{total}] Started running {systemtest}") t = time.perf_counter() result = systemtest.run(run_directory) elapsed_time = time.perf_counter() - t - logging.info(f"Running {systemtest} took {elapsed_time:^.1f} seconds") + logging.info(f"[{number}/{total}] Finished {systemtest} in {elapsed_time:.1f}s") results.append(result) + print() system_test_success = True for result in results: if not result.success: From afb2c139c306993958772a359d41f36778e1ab66 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Wed, 27 May 2026 14:24:01 +0530 Subject: [PATCH 03/11] Improve system test console visuals (#790) --- tools/tests/systemtests.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/tools/tests/systemtests.py b/tools/tests/systemtests.py index 36fb88f9d..e1f2ce466 100644 --- a/tools/tests/systemtests.py +++ b/tools/tests/systemtests.py @@ -7,6 +7,8 @@ from metadata_parser.metdata import Tutorials, Case import logging import time +import os +import sys from paths import PRECICE_TUTORIAL_DIR, PRECICE_TESTS_RUN_DIR, PRECICE_TESTS_DIR @@ -45,6 +47,23 @@ def main(): print(f"Using log-level: {args.log_level}") + gh_actions = os.environ.get("GITHUB_ACTIONS", "").lower() == "true" + ansi_colors = sys.stdout.isatty() and os.environ.get("TERM", "") not in {"", "dumb"} + + def _style(text: str, color_code: int | None) -> str: + if not ansi_colors or color_code is None: + return text + return f"\x1b[{color_code}m{text}\x1b[0m" + + def _group_start(title: str) -> None: + # Only apply folding markers on GitHub actions. + if gh_actions: + print(f"::group::{title}") + + def _group_end() -> None: + if gh_actions: + print("::endgroup::") + systemtests_to_run = [] available_tutorials = Tutorials.from_path(PRECICE_TUTORIAL_DIR) @@ -90,11 +109,22 @@ def main(): results = [] for number, systemtest in enumerate(systemtests_to_run, start=1): print() - logging.info(f"[{number}/{total}] Started running {systemtest}") + test_header = f"[{number}/{total}] {systemtest}" + _group_start(test_header) + print("=" * 80) + logging.info(f"Started {systemtest}") t = time.perf_counter() result = systemtest.run(run_directory) elapsed_time = time.perf_counter() - t - logging.info(f"[{number}/{total}] Finished {systemtest} in {elapsed_time:.1f}s") + + if result.success: + status_label = _style("✅ PASS", 32) + else: + status_label = _style("❌ FAIL", 31) + + logging.info(f"Finished {systemtest} in {elapsed_time:.1f}s [{status_label}]") + _group_end() + print("=" * 80) results.append(result) print() From af979fa1e3ee4697c263d6ea562856fe4a004b11 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Wed, 27 May 2026 14:25:12 +0530 Subject: [PATCH 04/11] Restore [n/m] prefix in Started/Finished visuals --- tools/tests/systemtests.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/tests/systemtests.py b/tools/tests/systemtests.py index e1f2ce466..0db940fe7 100644 --- a/tools/tests/systemtests.py +++ b/tools/tests/systemtests.py @@ -112,7 +112,7 @@ def _group_end() -> None: test_header = f"[{number}/{total}] {systemtest}" _group_start(test_header) print("=" * 80) - logging.info(f"Started {systemtest}") + logging.info(f"[{number}/{total}] Started {systemtest}") t = time.perf_counter() result = systemtest.run(run_directory) elapsed_time = time.perf_counter() - t @@ -122,7 +122,9 @@ def _group_end() -> None: else: status_label = _style("❌ FAIL", 31) - logging.info(f"Finished {systemtest} in {elapsed_time:.1f}s [{status_label}]") + logging.info( + f"[{number}/{total}] Finished {systemtest} in {elapsed_time:.1f}s [{status_label}]" + ) _group_end() print("=" * 80) results.append(result) From f2cdc66f47bdb9fe7a032639cd5438ac595b00f5 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Wed, 27 May 2026 16:09:17 +0530 Subject: [PATCH 05/11] Add changelog entry for system test log streaming (#790) --- changelog-entries/790.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog-entries/790.md diff --git a/changelog-entries/790.md b/changelog-entries/790.md new file mode 100644 index 000000000..1e2cb54f7 --- /dev/null +++ b/changelog-entries/790.md @@ -0,0 +1 @@ +- System tests now write logs during each run (build, solver, fieldcompare), including `system-tests-build.log`, `system-tests-solver.log`, and `system-tests-fieldcompare.log`, so failures can be debugged before a test finishes. Console output is easier to scan in local runs and GitHub Actions (fixes [#790](https://github.com/precice/tutorials/issues/790)). From c1d4b332e73b0b64996b61ef9bf7a8e27f167d47 Mon Sep 17 00:00:00 2001 From: Pranjal Date: Wed, 27 May 2026 16:21:08 +0530 Subject: [PATCH 06/11] Add changelog entry for version 801 --- changelog-entries/{790.md => 801.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog-entries/{790.md => 801.md} (100%) diff --git a/changelog-entries/790.md b/changelog-entries/801.md similarity index 100% rename from changelog-entries/790.md rename to changelog-entries/801.md From 2fcdc4f4af86624883dc992646b901c71c05dc31 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Wed, 27 May 2026 20:59:34 +0530 Subject: [PATCH 07/11] Update system test log documentation and workflow hints Document combined and per-stage system test logs in README and job summary hints, and clarify workflow artifact step names to reflect staged log files. --- .../generate_reference_results_workflow.yml | 2 +- .github/workflows/run_testsuite_workflow.yml | 2 +- tools/tests/README.md | 42 +++++++++++++++---- tools/tests/systemtests/Systemtest.py | 2 +- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/.github/workflows/generate_reference_results_workflow.yml b/.github/workflows/generate_reference_results_workflow.yml index 9e712126b..6211a64fa 100644 --- a/.github/workflows/generate_reference_results_workflow.yml +++ b/.github/workflows/generate_reference_results_workflow.yml @@ -91,7 +91,7 @@ jobs: name: system_tests_run_${{ github.run_id }}_${{ github.run_attempt }}_reference_full path: | runs/* - - name: Archive main log files + - name: Archive system test logs (combined + per-stage) uses: actions/upload-artifact@v7 with: name: system_tests_run_${{ github.run_id }}_${{ github.run_attempt }}_reference_logs diff --git a/.github/workflows/run_testsuite_workflow.yml b/.github/workflows/run_testsuite_workflow.yml index d96df2c5f..853a94e89 100644 --- a/.github/workflows/run_testsuite_workflow.yml +++ b/.github/workflows/run_testsuite_workflow.yml @@ -85,7 +85,7 @@ jobs: name: system_tests_run_${{ github.run_id }}_${{ github.run_attempt }}_full path: | runs/* - - name: Archive main log files + - name: Archive system test logs (combined + per-stage) uses: actions/upload-artifact@v7 with: name: system_tests_run_${{ github.run_id }}_${{ github.run_attempt }}_logs diff --git a/tools/tests/README.md b/tools/tests/README.md index ba4891f6b..0c3c0ca21 100644 --- a/tools/tests/README.md +++ b/tools/tests/README.md @@ -36,7 +36,7 @@ Workflow for the preCICE v3 release testing: 6. Download the build artifacts from Summary > runs. - - In there, you may want to check the `system-tests-stdout.log` and `system-tests-stderr.log` files. + - In there, you may want to inspect the combined logs (`system-tests-stdout.log`, `system-tests-stderr.log`) and the per-stage logs (`system-tests-build.log`, `system-tests-solver.log`, `system-tests-fieldcompare.log`). - The produced results are in `precice-exports/`, the reference results in `reference-results-unpacked`. - Compare using, e.g., ParaView or [fieldcompare](https://gitlab.com/dglaeser/fieldcompare): `fieldcompare dir precice-exports/ reference/`. The `--diff` option will give you `precice-exports/diff_*.vtu` files, while you can also try different tolerances with `-rtol` and `-atol`. @@ -45,7 +45,7 @@ Workflow for the preCICE v3 release testing: To test a certain test-suite defined in `tests.yaml`, use: ```bash -python3 systemtests.py --suites=fenics-adapter, +python3 systemtests.py --suites=fenics_test, ``` To discover all tests, use `python print_test_suites.py`. @@ -59,13 +59,13 @@ Go to Actions > [Run Testsuite (manual)](https://github.com/precice/tutorials/ac After bringing these changes to `master`, the manual triggering option should be visible on the top right. Until that happens, we can only trigger this workflow manually from the [GitHub CLI](https://github.blog/changelog/2021-04-15-github-cli-1-9-enables-you-to-work-with-github-actions-from-your-terminal/): ```shell -gh workflow run run_testsuite_manual.yml -f suites=fenics-adapter --ref=develop +gh workflow run run_testsuite_manual.yml -f suites=fenics_test --ref=develop ``` Another example, to use the latest releases and enable debug information of the tests: ```shell -gh workflow run run_testsuite_manual.yml -f suites=fenics-adapter -f build_args="PRECICE_REF:v3.1.1,PRECICE_PRESET:production-audit,OPENFOAM_ADAPTER_REF:v1.3.0,PYTHON_BINDINGS_REF:v3.1.0,FENICS_ADAPTER_REF:v2.1.0,SU2_VERSION:7.5.1,SU2_ADAPTER_REF:64d4aff,DEALII_ADAPTER_REF:02c5d18,TUTORIALS_REF:340b447" -f log_level=DEBUG --ref=develop +gh workflow run run_testsuite_manual.yml -f suites=fenics_test -f build_args="PRECICE_REF:v3.1.1,PRECICE_PRESET:production-audit,OPENFOAM_ADAPTER_REF:v1.3.0,PYTHON_BINDINGS_REF:v3.1.0,FENICS_ADAPTER_REF:v2.1.0,SU2_VERSION:7.5.1,SU2_ADAPTER_REF:64d4aff,DEALII_ADAPTER_REF:02c5d18,TUTORIALS_REF:340b447" -f log_level=DEBUG --ref=develop ``` where the `*_REF` should be a specific [commit-ish](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefcommit-ishacommit-ishalsocommittish). @@ -75,7 +75,7 @@ Example output: ```text Run cd tools/tests cd tools/tests - python systemtests.py --build_args=PRECICE_REF:v3.1.1,PRECICE_PRESET:production-audit,OPENFOAM_ADAPTER_REF:v1.3.0,PYTHON_BINDINGS_REF:v3.1.0,FENICS_ADAPTER_REF:v2.1.0 --suites=fenics-adapter --log-level=DEBUG + python systemtests.py --build_args=PRECICE_REF:v3.1.1,PRECICE_PRESET:production-audit,OPENFOAM_ADAPTER_REF:v1.3.0,PYTHON_BINDINGS_REF:v3.1.0,FENICS_ADAPTER_REF:v2.1.0 --suites=fenics_test --log-level=DEBUG cd ../../ shell: /usr/bin/bash -e {0} INFO: About to run the following systemtest in the directory /home/precice/runners_root/actions-runner-tutorial/_work/tutorials/tutorials/runs: @@ -105,7 +105,7 @@ In this case, building and running seems to work out, but the tests fail because The easiest way to debug a systemtest run is first to have a look at the output written into the action on GitHub. If this does not provide enough hints, the next step is to download the generated `system_tests_run__` artifact. Note that by default this will only be generated if the systemtests fail. -Inside the archive, a test-specific subfolder like `flow-over-heated-plate_fluid-openfoam-solid-fenics_2023-11-19-211723` contains two log files: `system-tests-stderr.log` and `system-tests-stdout.log`. This can be a starting point for a further investigation. When fieldcompare runs with `--diff`, it writes VTK diff files under `precice-exports/`; if the comparison fails, those files are copied into a `diff-results/` subfolder in the same run directory (mirroring any subpaths under `precice-exports/`) so you can open them (e.g. in ParaView) to see where results differ from the reference. On successful comparisons, `diff-results/` is therefore absent. +Inside the archive, a test-specific subfolder like `flow-over-heated-plate_fluid-openfoam-solid-fenics_2023-11-19-211723` contains combined logs (`system-tests-stderr.log`, `system-tests-stdout.log`) and per-stage logs (`system-tests-build.log`, `system-tests-solver.log`, `system-tests-fieldcompare.log`). These are a good starting point for further investigation. When fieldcompare runs with `--diff`, it writes VTK diff files under `precice-exports/`; if the comparison fails, those files are copied into a `diff-results/` subfolder in the same run directory (mirroring any subpaths under `precice-exports/`) so you can open them (e.g. in ParaView) to see where results differ from the reference. On successful comparisons, `diff-results/` is therefore absent. ## Adding new tests @@ -185,6 +185,25 @@ User-facing tools: - `print_case_combinations.py`: Prints all possible combinations of tutorial cases, using the `metadata.yaml` files. - `build_docker_images.py`: Build the Docker images for each test - `generate_reference_results.py`: Executes the system tests with the versions defined in `reference_versions.yaml` and generates the reference data archives, with the names described in `tests.yaml`. (should only be used by the CI Pipeline) + - `update_requirements_reference.py`: Regenerates `requirements-reference.txt` with pinned Python versions from `reference_versions.yaml` (for reproducibility, see issue #610). + - `validate_requirements_reference.py`: Validates that `requirements-reference.txt` exists and pyprecice version matches `reference_versions.yaml`. + +### requirements-reference.txt + +A lockfile-style list of pinned Python dependency versions (pyprecice, numpy, matplotlib, nutils, setuptools) for reproducible builds and distributions (see issue [#610](https://github.com/precice/tutorials/issues/610)). This file is a **reference manifest only**: tutorial `run.sh` scripts keep using their own `requirements.txt` (with loose constraints) to avoid merge back-and-forth; system tests use the Docker image’s venv. The reference file records “versions known to work” for a distribution. + +**Update at each release.** For best accuracy (match what CI uses), capture from the systemtest Docker image: + +```bash +docker run --rm pip freeze | python3 update_requirements_reference.py --from-freeze +``` + +Or regenerate from `reference_versions.yaml` only (pyprecice from PYTHON_BINDINGS_REF; others from script defaults): + +```bash +cd tools/tests +python3 update_requirements_reference.py +``` Implementation scripts: @@ -316,7 +335,14 @@ Concrete tests are specified centrally in the file `tests.yaml`. For example: ```yaml test_suites: - openfoam-adapter: + openfoam_adapter_pr: + tutorials: + - path: flow-over-heated-plate + case_combination: + - fluid-openfoam + - solid-openfoam + reference_result: ./flow-over-heated-plate/reference-results/fluid-openfoam_solid-openfoam.tar.gz + openfoam_adapter_release: tutorials: - path: flow-over-heated-plate case_combination: @@ -333,7 +359,7 @@ test_suites: The optional `timeout` field (in seconds) sets the maximum time for the solver run and fieldcompare phases of that specific case. If omitted, it defaults to `GLOBAL_TIMEOUT` (currently 900s, overridable via the `PRECICE_SYSTEMTESTS_TIMEOUT` environment variable). -This defines the test suite `openfoam-adapter`, with a case combination to run. +This defines two test suites, namely `openfoam_adapter_pr` and `openfoam_adapter_release`. Each of them defines which case combinations of which tutorials to run. ### Generate Reference Results diff --git a/tools/tests/systemtests/Systemtest.py b/tools/tests/systemtests/Systemtest.py index 36998ae6a..4991abd5f 100644 --- a/tools/tests/systemtests/Systemtest.py +++ b/tools/tests/systemtests/Systemtest.py @@ -160,7 +160,7 @@ def _get_length_of_name(results: List[SystemtestResult]) -> int: with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: print("\n\n", file=f) print( - "In case a test fails, download the archive from the bottom of this page and look into each `system-tests-stdout.log` and `system-tests-stderr.log`. The time spent in each step might already give useful hints.", + "In case a test fails, download the archive from the bottom of this page and inspect the combined logs (`system-tests-stdout.log`, `system-tests-stderr.log`) and the per-stage logs (`system-tests-build.log`, `system-tests-solver.log`, `system-tests-fieldcompare.log`). The phase runtimes might already give useful hints.", file=f) print( "See the [documentation](https://precice.org/dev-docs-system-tests.html#understanding-what-went-wrong).", From 43ae44a2d5eadf801d518be60165cb445cbb9711 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Wed, 27 May 2026 21:05:04 +0530 Subject: [PATCH 08/11] Keep README changes focused on staged system test logs only --- tools/tests/README.md | 42 ++++++++---------------------------------- 1 file changed, 8 insertions(+), 34 deletions(-) diff --git a/tools/tests/README.md b/tools/tests/README.md index 0c3c0ca21..ba4891f6b 100644 --- a/tools/tests/README.md +++ b/tools/tests/README.md @@ -36,7 +36,7 @@ Workflow for the preCICE v3 release testing: 6. Download the build artifacts from Summary > runs. - - In there, you may want to inspect the combined logs (`system-tests-stdout.log`, `system-tests-stderr.log`) and the per-stage logs (`system-tests-build.log`, `system-tests-solver.log`, `system-tests-fieldcompare.log`). + - In there, you may want to check the `system-tests-stdout.log` and `system-tests-stderr.log` files. - The produced results are in `precice-exports/`, the reference results in `reference-results-unpacked`. - Compare using, e.g., ParaView or [fieldcompare](https://gitlab.com/dglaeser/fieldcompare): `fieldcompare dir precice-exports/ reference/`. The `--diff` option will give you `precice-exports/diff_*.vtu` files, while you can also try different tolerances with `-rtol` and `-atol`. @@ -45,7 +45,7 @@ Workflow for the preCICE v3 release testing: To test a certain test-suite defined in `tests.yaml`, use: ```bash -python3 systemtests.py --suites=fenics_test, +python3 systemtests.py --suites=fenics-adapter, ``` To discover all tests, use `python print_test_suites.py`. @@ -59,13 +59,13 @@ Go to Actions > [Run Testsuite (manual)](https://github.com/precice/tutorials/ac After bringing these changes to `master`, the manual triggering option should be visible on the top right. Until that happens, we can only trigger this workflow manually from the [GitHub CLI](https://github.blog/changelog/2021-04-15-github-cli-1-9-enables-you-to-work-with-github-actions-from-your-terminal/): ```shell -gh workflow run run_testsuite_manual.yml -f suites=fenics_test --ref=develop +gh workflow run run_testsuite_manual.yml -f suites=fenics-adapter --ref=develop ``` Another example, to use the latest releases and enable debug information of the tests: ```shell -gh workflow run run_testsuite_manual.yml -f suites=fenics_test -f build_args="PRECICE_REF:v3.1.1,PRECICE_PRESET:production-audit,OPENFOAM_ADAPTER_REF:v1.3.0,PYTHON_BINDINGS_REF:v3.1.0,FENICS_ADAPTER_REF:v2.1.0,SU2_VERSION:7.5.1,SU2_ADAPTER_REF:64d4aff,DEALII_ADAPTER_REF:02c5d18,TUTORIALS_REF:340b447" -f log_level=DEBUG --ref=develop +gh workflow run run_testsuite_manual.yml -f suites=fenics-adapter -f build_args="PRECICE_REF:v3.1.1,PRECICE_PRESET:production-audit,OPENFOAM_ADAPTER_REF:v1.3.0,PYTHON_BINDINGS_REF:v3.1.0,FENICS_ADAPTER_REF:v2.1.0,SU2_VERSION:7.5.1,SU2_ADAPTER_REF:64d4aff,DEALII_ADAPTER_REF:02c5d18,TUTORIALS_REF:340b447" -f log_level=DEBUG --ref=develop ``` where the `*_REF` should be a specific [commit-ish](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefcommit-ishacommit-ishalsocommittish). @@ -75,7 +75,7 @@ Example output: ```text Run cd tools/tests cd tools/tests - python systemtests.py --build_args=PRECICE_REF:v3.1.1,PRECICE_PRESET:production-audit,OPENFOAM_ADAPTER_REF:v1.3.0,PYTHON_BINDINGS_REF:v3.1.0,FENICS_ADAPTER_REF:v2.1.0 --suites=fenics_test --log-level=DEBUG + python systemtests.py --build_args=PRECICE_REF:v3.1.1,PRECICE_PRESET:production-audit,OPENFOAM_ADAPTER_REF:v1.3.0,PYTHON_BINDINGS_REF:v3.1.0,FENICS_ADAPTER_REF:v2.1.0 --suites=fenics-adapter --log-level=DEBUG cd ../../ shell: /usr/bin/bash -e {0} INFO: About to run the following systemtest in the directory /home/precice/runners_root/actions-runner-tutorial/_work/tutorials/tutorials/runs: @@ -105,7 +105,7 @@ In this case, building and running seems to work out, but the tests fail because The easiest way to debug a systemtest run is first to have a look at the output written into the action on GitHub. If this does not provide enough hints, the next step is to download the generated `system_tests_run__` artifact. Note that by default this will only be generated if the systemtests fail. -Inside the archive, a test-specific subfolder like `flow-over-heated-plate_fluid-openfoam-solid-fenics_2023-11-19-211723` contains combined logs (`system-tests-stderr.log`, `system-tests-stdout.log`) and per-stage logs (`system-tests-build.log`, `system-tests-solver.log`, `system-tests-fieldcompare.log`). These are a good starting point for further investigation. When fieldcompare runs with `--diff`, it writes VTK diff files under `precice-exports/`; if the comparison fails, those files are copied into a `diff-results/` subfolder in the same run directory (mirroring any subpaths under `precice-exports/`) so you can open them (e.g. in ParaView) to see where results differ from the reference. On successful comparisons, `diff-results/` is therefore absent. +Inside the archive, a test-specific subfolder like `flow-over-heated-plate_fluid-openfoam-solid-fenics_2023-11-19-211723` contains two log files: `system-tests-stderr.log` and `system-tests-stdout.log`. This can be a starting point for a further investigation. When fieldcompare runs with `--diff`, it writes VTK diff files under `precice-exports/`; if the comparison fails, those files are copied into a `diff-results/` subfolder in the same run directory (mirroring any subpaths under `precice-exports/`) so you can open them (e.g. in ParaView) to see where results differ from the reference. On successful comparisons, `diff-results/` is therefore absent. ## Adding new tests @@ -185,25 +185,6 @@ User-facing tools: - `print_case_combinations.py`: Prints all possible combinations of tutorial cases, using the `metadata.yaml` files. - `build_docker_images.py`: Build the Docker images for each test - `generate_reference_results.py`: Executes the system tests with the versions defined in `reference_versions.yaml` and generates the reference data archives, with the names described in `tests.yaml`. (should only be used by the CI Pipeline) - - `update_requirements_reference.py`: Regenerates `requirements-reference.txt` with pinned Python versions from `reference_versions.yaml` (for reproducibility, see issue #610). - - `validate_requirements_reference.py`: Validates that `requirements-reference.txt` exists and pyprecice version matches `reference_versions.yaml`. - -### requirements-reference.txt - -A lockfile-style list of pinned Python dependency versions (pyprecice, numpy, matplotlib, nutils, setuptools) for reproducible builds and distributions (see issue [#610](https://github.com/precice/tutorials/issues/610)). This file is a **reference manifest only**: tutorial `run.sh` scripts keep using their own `requirements.txt` (with loose constraints) to avoid merge back-and-forth; system tests use the Docker image’s venv. The reference file records “versions known to work” for a distribution. - -**Update at each release.** For best accuracy (match what CI uses), capture from the systemtest Docker image: - -```bash -docker run --rm pip freeze | python3 update_requirements_reference.py --from-freeze -``` - -Or regenerate from `reference_versions.yaml` only (pyprecice from PYTHON_BINDINGS_REF; others from script defaults): - -```bash -cd tools/tests -python3 update_requirements_reference.py -``` Implementation scripts: @@ -335,14 +316,7 @@ Concrete tests are specified centrally in the file `tests.yaml`. For example: ```yaml test_suites: - openfoam_adapter_pr: - tutorials: - - path: flow-over-heated-plate - case_combination: - - fluid-openfoam - - solid-openfoam - reference_result: ./flow-over-heated-plate/reference-results/fluid-openfoam_solid-openfoam.tar.gz - openfoam_adapter_release: + openfoam-adapter: tutorials: - path: flow-over-heated-plate case_combination: @@ -359,7 +333,7 @@ test_suites: The optional `timeout` field (in seconds) sets the maximum time for the solver run and fieldcompare phases of that specific case. If omitted, it defaults to `GLOBAL_TIMEOUT` (currently 900s, overridable via the `PRECICE_SYSTEMTESTS_TIMEOUT` environment variable). -This defines two test suites, namely `openfoam_adapter_pr` and `openfoam_adapter_release`. Each of them defines which case combinations of which tutorials to run. +This defines the test suite `openfoam-adapter`, with a case combination to run. ### Generate Reference Results From 66b032dc147b31687c7a71aa71539ec7994b3414 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Mon, 1 Jun 2026 14:25:00 +0530 Subject: [PATCH 09/11] Address review feedback on system test logging and console output --- .../generate_reference_results_workflow.yml | 6 +- .github/workflows/run_testsuite_workflow.yml | 6 +- changelog-entries/801.md | 2 +- tools/tests/README.md | 4 +- tools/tests/systemtests.py | 63 +++++++++++-------- tools/tests/systemtests/Systemtest.py | 56 +++++++++-------- 6 files changed, 76 insertions(+), 61 deletions(-) diff --git a/.github/workflows/generate_reference_results_workflow.yml b/.github/workflows/generate_reference_results_workflow.yml index c7ca1bac0..30983dd7e 100644 --- a/.github/workflows/generate_reference_results_workflow.yml +++ b/.github/workflows/generate_reference_results_workflow.yml @@ -92,7 +92,7 @@ jobs: fi git commit -m "${{inputs.commit_msg}}" git push - - name: Archive system test logs (combined + per-stage) + - name: Archive system test logs if: ${{ always() }} uses: actions/upload-artifact@v7 with: @@ -101,8 +101,8 @@ jobs: runs/*/system-tests-stdout.log runs/*/system-tests-stderr.log runs/*/system-tests-build.log - runs/*/system-tests-solver.log - runs/*/system-tests-fieldcompare.log + runs/*/system-tests-run.log + runs/*/system-tests-compare.log runs/*/*/system-tests_*.log - name: Archive run files if: failure() diff --git a/.github/workflows/run_testsuite_workflow.yml b/.github/workflows/run_testsuite_workflow.yml index a0b8f028c..c3178fd65 100644 --- a/.github/workflows/run_testsuite_workflow.yml +++ b/.github/workflows/run_testsuite_workflow.yml @@ -78,7 +78,7 @@ jobs: cd tools/tests python systemtests.py --build_args=${{ inputs.build_args}} --suites=${{ inputs.suites}} --log-level=${{ inputs.log_level}} cd ../../ - - name: Archive system test logs (combined + per-stage) + - name: Archive system test logs if: ${{ always() }} uses: actions/upload-artifact@v7 with: @@ -87,8 +87,8 @@ jobs: runs/*/system-tests-stdout.log runs/*/system-tests-stderr.log runs/*/system-tests-build.log - runs/*/system-tests-solver.log - runs/*/system-tests-fieldcompare.log + runs/*/system-tests-run.log + runs/*/system-tests-compare.log runs/*/*/system-tests_*.log - name: Archive run files if: ${{ failure() || inputs.upload_artifacts == 'TRUE' }} diff --git a/changelog-entries/801.md b/changelog-entries/801.md index 1e2cb54f7..745806f4a 100644 --- a/changelog-entries/801.md +++ b/changelog-entries/801.md @@ -1 +1 @@ -- System tests now write logs during each run (build, solver, fieldcompare), including `system-tests-build.log`, `system-tests-solver.log`, and `system-tests-fieldcompare.log`, so failures can be debugged before a test finishes. Console output is easier to scan in local runs and GitHub Actions (fixes [#790](https://github.com/precice/tutorials/issues/790)). +- System tests now write logs during each run (build, run, compare), including `system-tests-build.log`, `system-tests-run.log`, and `system-tests-compare.log` [#801](https://github.com/precice/tutorials/pull/801). diff --git a/tools/tests/README.md b/tools/tests/README.md index 8a7df6e20..6f7e6cabf 100644 --- a/tools/tests/README.md +++ b/tools/tests/README.md @@ -36,7 +36,7 @@ Workflow for the preCICE v3 release testing: 6. Download the build artifacts from Summary > runs. - - In there, you may want to check the `system-tests-stdout.log` and `system-tests-stderr.log` files. + - In there, inspect the combined logs (`system-tests-stdout.log`, `system-tests-stderr.log`) and the per-stage logs (`system-tests-build.log`, `system-tests-run.log`, `system-tests-compare.log`). - The produced results are in `precice-exports/`, the reference results in `reference-results-unpacked`. - Compare using, e.g., ParaView or [fieldcompare](https://gitlab.com/dglaeser/fieldcompare): `fieldcompare dir precice-exports/ reference/`. The `--diff` option will give you `precice-exports/diff_*.vtu` files, while you can also try different tolerances with `-rtol` and `-atol`. @@ -105,7 +105,7 @@ In this case, building and running seems to work out, but the tests fail because The easiest way to debug a systemtest run is first to have a look at the output written into the action on GitHub. If this does not provide enough hints, the next step is to download the generated `system_tests_run__` artifact. Note that by default this will only be generated if the systemtests fail. -Inside the archive, a test-specific subfolder like `flow-over-heated-plate_fluid-openfoam-solid-fenics_2023-11-19-211723` contains two log files: `system-tests-stderr.log` and `system-tests-stdout.log`. This can be a starting point for a further investigation. When fieldcompare runs with `--diff`, it writes VTK (.vtu) diff files under `precice-exports/`; if the comparison fails, those files are copied into a `diff-results/` subfolder in the same run directory (mirroring any subpaths under `precice-exports/`) so you can open them (e.g. in ParaView) to see where results differ from the reference. On successful comparisons, `diff-results/` is therefore absent. +Inside the archive, a test-specific subfolder like `flow-over-heated-plate_fluid-openfoam-solid-fenics_2023-11-19-211723` contains combined logs (`system-tests-stderr.log`, `system-tests-stdout.log`) and per-stage logs (`system-tests-build.log`, `system-tests-run.log`, `system-tests-compare.log`). These are a good starting point for further investigation. When fieldcompare runs with `--diff`, it writes VTK (.vtu) diff files under `precice-exports/`; if the comparison fails, those files are copied into a `diff-results/` subfolder in the same run directory (mirroring any subpaths under `precice-exports/`) so you can open them (e.g. in ParaView) to see where results differ from the reference. On successful comparisons, `diff-results/` is therefore absent. ## Adding new tests diff --git a/tools/tests/systemtests.py b/tools/tests/systemtests.py index 0db940fe7..9544fb2f4 100644 --- a/tools/tests/systemtests.py +++ b/tools/tests/systemtests.py @@ -40,14 +40,13 @@ def main(): # Parse the command-line arguments args = parser.parse_args() - # Configure logging: no "INFO:" prefix on routine messages (#790) + # Configure logging handler = logging.StreamHandler() handler.setFormatter(_ConsoleLogFormatter()) logging.basicConfig(level=args.log_level, handlers=[handler]) - print(f"Using log-level: {args.log_level}") - gh_actions = os.environ.get("GITHUB_ACTIONS", "").lower() == "true" + # Skip ANSI colors when TERM is unset or "dumb" (minimal terminal, common in CI). ansi_colors = sys.stdout.isatty() and os.environ.get("TERM", "") not in {"", "dumb"} def _style(text: str, color_code: int | None) -> str: @@ -56,7 +55,6 @@ def _style(text: str, color_code: int | None) -> str: return f"\x1b[{color_code}m{text}\x1b[0m" def _group_start(title: str) -> None: - # Only apply folding markers on GitHub actions. if gh_actions: print(f"::group::{title}") @@ -65,6 +63,7 @@ def _group_end() -> None: print("::endgroup::") systemtests_to_run = [] + test_suites_to_execute = [] available_tutorials = Tutorials.from_path(PRECICE_TUTORIAL_DIR) build_args = SystemtestArguments.from_args(args.build_args) @@ -73,7 +72,6 @@ def _group_end() -> None: test_suites_requested = args.suites.split(',') available_testsuites = TestSuites.from_yaml( PRECICE_TESTS_DIR / "tests.yaml", available_tutorials) - test_suites_to_execute = [] for test_suite_requested in test_suites_requested: test_suite_found = available_testsuites.get_by_name( test_suite_requested) @@ -103,33 +101,48 @@ def _group_end() -> None: raise RuntimeError("Did not find any Systemtests to execute.") total = len(systemtests_to_run) - logging.info( - f"About to run {total} systemtest(s) in the directory {run_directory}:\n {systemtests_to_run}") + + if test_suites_to_execute: + print("Selected test suite(s):", flush=True) + print(flush=True) + for test_suite in test_suites_to_execute: + print(f"- {test_suite.name}", flush=True) + print(flush=True) + + print(f"About to run {total} test(s) in the directory {run_directory}:", flush=True) + print(flush=True) + for number, systemtest in enumerate(systemtests_to_run, start=1): + print(f"{number}. {systemtest}", flush=True) + print(flush=True) + print(f"Using log-level: {args.log_level}", flush=True) results = [] for number, systemtest in enumerate(systemtests_to_run, start=1): - print() + print(flush=True) test_header = f"[{number}/{total}] {systemtest}" _group_start(test_header) - print("=" * 80) - logging.info(f"[{number}/{total}] Started {systemtest}") - t = time.perf_counter() - result = systemtest.run(run_directory) - elapsed_time = time.perf_counter() - t - - if result.success: - status_label = _style("✅ PASS", 32) - else: - status_label = _style("❌ FAIL", 31) + if not gh_actions: + print("=" * 80, flush=True) + try: + logging.info(f"[{number}/{total}] Started {systemtest}") + t = time.perf_counter() + result = systemtest.run(run_directory) + elapsed_time = time.perf_counter() - t + + if result.success: + status_label = _style("✅ PASS", 32) + else: + status_label = _style("❌ FAIL", 31) - logging.info( - f"[{number}/{total}] Finished {systemtest} in {elapsed_time:.1f}s [{status_label}]" - ) - _group_end() - print("=" * 80) - results.append(result) + logging.info( + f"{status_label} Finished {systemtest} in {elapsed_time:.1f}s" + ) + results.append(result) + finally: + _group_end() + if not gh_actions: + print("=" * 80, flush=True) - print() system_test_success = True for result in results: if not result.success: diff --git a/tools/tests/systemtests/Systemtest.py b/tools/tests/systemtests/Systemtest.py index 04293ccd4..492b480a5 100644 --- a/tools/tests/systemtests/Systemtest.py +++ b/tools/tests/systemtests/Systemtest.py @@ -27,15 +27,15 @@ COMBINED_STDOUT_LOG = "system-tests-stdout.log" COMBINED_STDERR_LOG = "system-tests-stderr.log" -PHASE_LOG_FILES = { +STAGE_LOG_FILES = { "build": "system-tests-build.log", - "solver": "system-tests-solver.log", - "fieldcompare": "system-tests-fieldcompare.log", + "run": "system-tests-run.log", + "compare": "system-tests-compare.log", } class _SystemtestLogSink: - """Writes subprocess output incrementally to combined and phase log files.""" + """Writes subprocess output incrementally to combined and stage log files.""" def __init__(self, system_test_dir: Path): self._system_test_dir = system_test_dir @@ -45,24 +45,24 @@ def __init__(self, system_test_dir: Path): self._stdout_path.write_text("", encoding="utf-8") self._stderr_path.write_text("", encoding="utf-8") - def begin_phase(self, phase: str) -> None: - phase_path = self._system_test_dir / PHASE_LOG_FILES[phase] - phase_path.write_text(f"=== {phase} ===\n", encoding="utf-8") + def begin_stage(self, stage: str) -> None: + stage_path = self._system_test_dir / STAGE_LOG_FILES[stage] + stage_path.write_text(f"=== {stage} ===\n", encoding="utf-8") - def append_stdout(self, line: str, phase: str) -> None: + def append_stdout(self, line: str, stage: str) -> None: with self._lock: with self._stdout_path.open("a", encoding="utf-8") as log_file: log_file.write(line + "\n") - phase_path = self._system_test_dir / PHASE_LOG_FILES[phase] - with phase_path.open("a", encoding="utf-8") as log_file: + stage_path = self._system_test_dir / STAGE_LOG_FILES[stage] + with stage_path.open("a", encoding="utf-8") as log_file: log_file.write(line + "\n") - def append_stderr(self, line: str, phase: str) -> None: + def append_stderr(self, line: str, stage: str) -> None: with self._lock: with self._stderr_path.open("a", encoding="utf-8") as log_file: log_file.write(line + "\n") - phase_path = self._system_test_dir / PHASE_LOG_FILES[phase] - with phase_path.open("a", encoding="utf-8") as log_file: + stage_path = self._system_test_dir / STAGE_LOG_FILES[stage] + with stage_path.open("a", encoding="utf-8") as log_file: log_file.write(f"[stderr] {line}\n") @@ -121,6 +121,8 @@ def display_systemtestresults_as_table(results: List[SystemtestResult]): """ Prints the result in a nice tabluated way to get an easy overview """ + print() + def _get_length_of_name(results: List[SystemtestResult]) -> int: return max(len(str(result.systemtest)) for result in results) @@ -160,7 +162,7 @@ def _get_length_of_name(results: List[SystemtestResult]) -> int: with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: print("\n\n", file=f) print( - "In case a test fails, download the archive from the bottom of this page and inspect the combined logs (`system-tests-stdout.log`, `system-tests-stderr.log`) and the per-stage logs (`system-tests-build.log`, `system-tests-solver.log`, `system-tests-fieldcompare.log`). The phase runtimes might already give useful hints.", + "In case a test fails, download the archive from the bottom of this page and inspect the combined logs (`system-tests-stdout.log`, `system-tests-stderr.log`) and the per-stage logs (`system-tests-build.log`, `system-tests-run.log`, `system-tests-compare.log`). The stage runtimes might already give useful hints.", file=f) print( "See the [documentation](https://precice.org/dev-docs-system-tests.html#understanding-what-went-wrong).", @@ -446,7 +448,7 @@ def __init_run_logs(self) -> None: def _run_docker_compose_subprocess( self, command: List[str], - phase: str, + stage: str, timeout: int, ) -> Tuple[int, List[str], List[str]]: """ @@ -456,8 +458,8 @@ def _run_docker_compose_subprocess( stderr_data: List[str] = [] log_sink = getattr(self, "_log_sink", None) if log_sink is not None: - log_sink.begin_phase(phase) - logging.info(f"Docker compose {phase} for {self}") + log_sink.begin_stage(stage) + logging.info(f"Docker compose {stage} for {self}") try: process = subprocess.Popen( @@ -470,7 +472,7 @@ def _run_docker_compose_subprocess( cwd=self.system_test_dir, ) except Exception as e: - logging.critical(f"Error starting docker compose {phase} command: {e}") + logging.critical(f"Error starting docker compose {stage} command: {e}") return 1, stdout_data, stderr_data def read_stream(stream, is_stderr: bool) -> None: @@ -481,11 +483,11 @@ def read_stream(stream, is_stderr: bool) -> None: if is_stderr: stderr_data.append(line) if log_sink is not None: - log_sink.append_stderr(line, phase) + log_sink.append_stderr(line, stage) else: stdout_data.append(line) if log_sink is not None: - log_sink.append_stdout(line, phase) + log_sink.append_stdout(line, stage) stream.close() stdout_thread = threading.Thread( @@ -504,7 +506,7 @@ def read_stream(stream, is_stderr: bool) -> None: raise KeyboardInterrupt from k except subprocess.TimeoutExpired: logging.critical( - f"Systemtest {self} timed out during docker compose {phase} " + f"Systemtest {self} timed out during docker compose {stage} " f"after {timeout}s. Killing the process.") process.kill() try: @@ -514,7 +516,7 @@ def read_stream(stream, is_stderr: bool) -> None: exit_code = process.returncode if process.returncode is not None else 1 except Exception as e: logging.critical( - f"Systemtest {self} had serious issues during docker compose {phase}: {e}") + f"Systemtest {self} had serious issues during docker compose {stage}: {e}") process.kill() try: process.wait(timeout=SHORT_TIMEOUT) @@ -570,8 +572,8 @@ def _run_field_compare(self): if not unpack_success: log_sink = getattr(self, "_log_sink", None) if log_sink is not None: - log_sink.begin_phase("fieldcompare") - log_sink.append_stderr(unpack_error_message, "fieldcompare") + log_sink.begin_stage("compare") + log_sink.append_stderr(unpack_error_message, "compare") elapsed_time = time.perf_counter() - time_start return FieldCompareResult(1, [], [unpack_error_message], self, elapsed_time) docker_compose_content = self.__get_field_compare_compose_file() @@ -588,7 +590,7 @@ def _run_field_compare(self): '--exit-code-from', 'field-compare', ], - "fieldcompare", + "compare", self.timeout, ) elapsed_time = time.perf_counter() - time_start @@ -648,10 +650,10 @@ def _build_docker(self): [ 'docker', 'compose', + '--progress=plain', '--file', 'docker-compose.tutorial.yaml', 'build', - '--progress=plain', ], "build", GLOBAL_TIMEOUT, @@ -676,7 +678,7 @@ def _run_tutorial(self): 'docker-compose.tutorial.yaml', 'up', ], - "solver", + "run", self.timeout, ) elapsed_time = time.perf_counter() - time_start From 02b001b14db50f438f0c55d35d3d1ea5f0758523 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Mon, 1 Jun 2026 18:36:27 +0530 Subject: [PATCH 10/11] Tweak system test console output for GitHub Actions Print PASS/FAIL after ::endgroup:: so results stay visible when groups are collapsed, add a blank line before the per-test summary, and show status symbols in the results table and job summary. --- tools/tests/systemtests.py | 25 +++++++++++-------------- tools/tests/systemtests/Systemtest.py | 8 ++++++-- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/tools/tests/systemtests.py b/tools/tests/systemtests.py index 9544fb2f4..10531b33d 100644 --- a/tools/tests/systemtests.py +++ b/tools/tests/systemtests.py @@ -56,11 +56,11 @@ def _style(text: str, color_code: int | None) -> str: def _group_start(title: str) -> None: if gh_actions: - print(f"::group::{title}") + print(f"::group::{title}", flush=True) def _group_end() -> None: if gh_actions: - print("::endgroup::") + print("::endgroup::", flush=True) systemtests_to_run = [] test_suites_to_execute = [] @@ -119,12 +119,11 @@ def _group_end() -> None: results = [] for number, systemtest in enumerate(systemtests_to_run, start=1): print(flush=True) - test_header = f"[{number}/{total}] {systemtest}" - _group_start(test_header) - if not gh_actions: - print("=" * 80, flush=True) + started_header = f"[{number}/{total}] Started {systemtest}" + _group_start(started_header) try: - logging.info(f"[{number}/{total}] Started {systemtest}") + if not gh_actions: + logging.info(started_header) t = time.perf_counter() result = systemtest.run(run_directory) elapsed_time = time.perf_counter() - t @@ -133,16 +132,14 @@ def _group_end() -> None: status_label = _style("✅ PASS", 32) else: status_label = _style("❌ FAIL", 31) - - logging.info( - f"{status_label} Finished {systemtest} in {elapsed_time:.1f}s" - ) - results.append(result) finally: _group_end() - if not gh_actions: - print("=" * 80, flush=True) + print(f"{status_label} Finished {systemtest} in {elapsed_time:.1f}s", flush=True) + print(f"[{number}/{total}] {systemtest}", flush=True) + results.append(result) + + print(flush=True) system_test_success = True for result in results: if not result.success: diff --git a/tools/tests/systemtests/Systemtest.py b/tools/tests/systemtests/Systemtest.py index 492b480a5..cbee71c4e 100644 --- a/tools/tests/systemtests/Systemtest.py +++ b/tools/tests/systemtests/Systemtest.py @@ -117,6 +117,10 @@ class SystemtestResult: fieldcompare_time: float # in seconds +def _success_status_symbol(success: bool) -> str: + return "✅" if success else "❌" + + def display_systemtestresults_as_table(results: List[SystemtestResult]): """ Prints the result in a nice tabluated way to get an easy overview @@ -129,7 +133,7 @@ def _get_length_of_name(results: List[SystemtestResult]) -> int: max_name_length = _get_length_of_name(results) header = f"| {'systemtest':<{max_name_length + 2}} "\ - f"| {'success':^7} "\ + f"| {'status':^7} "\ f"| {'building time [s]':^17} "\ f"| {'solver time [s]':^15} "\ f"| {'fieldcompare time [s]':^21} |" @@ -148,7 +152,7 @@ def _get_length_of_name(results: List[SystemtestResult]) -> int: for result in results: row = f"| {str(result.systemtest):<{max_name_length + 2}} "\ - f"| {result.success:^7} "\ + f"| {_success_status_symbol(result.success):^7} "\ f"| {result.build_time:^17.1f} "\ f"| {result.solver_time:^15.1f} "\ f"| {result.fieldcompare_time:^21.1f} |" From 2bacf6f6f35f98aa8fcbbae4264a41658bc20504 Mon Sep 17 00:00:00 2001 From: PranjalManhgaye Date: Mon, 1 Jun 2026 19:15:41 +0530 Subject: [PATCH 11/11] Restore blank lines between system tests on GitHub Actions Add an empty line after each test's PASS/FAIL output so logs stay readable now that === banners are removed. --- tools/tests/systemtests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/tests/systemtests.py b/tools/tests/systemtests.py index 10531b33d..42d695a84 100644 --- a/tools/tests/systemtests.py +++ b/tools/tests/systemtests.py @@ -137,6 +137,7 @@ def _group_end() -> None: print(f"{status_label} Finished {systemtest} in {elapsed_time:.1f}s", flush=True) print(f"[{number}/{total}] {systemtest}", flush=True) + print(flush=True) results.append(result) print(flush=True)