Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions easybuild/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
from datetime import datetime
from string import ascii_letters
from textwrap import indent
from typing import Any

import easybuild.tools.environment as env
import easybuild.tools.toolchain as toolchain
Expand Down Expand Up @@ -3497,17 +3498,22 @@ def _sanity_check_step_multi_deps(self, *args, **kwargs):
self.cfg['builddependencies'] = builddeps
self.cfg.iterating = False

def get_from_easyconfig_or_build_option(self, option_name: str) -> Any:
"""Return the value of this option if it is set (not None) in the easyconfig else from the CLI/config"""
res = self.cfg.get(option_name)
return res if res is not None else build_option(option_name)
Comment on lines +3501 to +3504
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For CUDA compute capabilities what is set in the CLI/config takes precedence over what is in the easyconfig, but in this case, what is in the easyconfig is taking precedence over what is set in the CLI/config. This is likely to lead to confusion (I do not have a solution).


def sanity_check_cuda(self, cuda_dirs=None):
"""Sanity check that binaries/libraries contain device code for the correct architecture targets."""

self.log.info("Checking binaries/libraries for CUDA device code...")

fail_msgs = []
cfg_ccs = build_option('cuda_compute_capabilities') or self.cfg.get('cuda_compute_capabilities', None)
ignore_failures = not build_option('cuda_sanity_check_error_on_failed_checks')
strict_cc_check = build_option('cuda_sanity_check_strict')
accept_ptx_as_devcode = build_option('cuda_sanity_check_accept_ptx_as_devcode')
accept_missing_ptx = build_option('cuda_sanity_check_accept_missing_ptx')
cfg_ccs = build_option('cuda_compute_capabilities') or self.cfg.get('cuda_compute_capabilities')
ignore_failures = not self.get_from_easyconfig_or_build_option('cuda_sanity_check_error_on_failed_checks')
strict_cc_check = self.get_from_easyconfig_or_build_option('cuda_sanity_check_strict')
accept_ptx_as_devcode = self.get_from_easyconfig_or_build_option('cuda_sanity_check_accept_ptx_as_devcode')
accept_missing_ptx = self.get_from_easyconfig_or_build_option('cuda_sanity_check_accept_missing_ptx')

# Construct the list of files to ignore as full paths (cuda_sanity_ignore_files contains the paths
# to ignore, relative to the installation prefix)
Expand Down
7 changes: 7 additions & 0 deletions easybuild/framework/easyconfig/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@
'after make (for e.g.,"test" for make test)'), BUILD],
'bin_lib_subdirs': [[], "List of subdirectories for binaries and libraries, which is used during sanity check "
"to check RPATH linking and banned/required libraries", BUILD],
'cuda_sanity_check_accept_missing_ptx': [None, "If set, override value for "
"`--cuda-sanity-check-accept-missing-ptx`", BUILD],
'cuda_sanity_check_accept_ptx_as_devcode': [None, "If set, override value for "
"`--cuda-sanity-check-accept-ptx-as-devcode`", BUILD],
'cuda_sanity_check_error_on_failed_checks': [None, "If set, override value for "
"`--cuda-sanity-check-error-on-failed-checks`", BUILD],
'cuda_sanity_check_strict': [None, "If set, override value for `--cuda-sanity-check-strict`", BUILD],
'cuda_sanity_ignore_files': [[], "List of files (relative to the installation prefix) for which failures in "
"the CUDA sanity check step are ignored. Typically used for files where you "
"know the CUDA architectures in those files don't match the "
Expand Down
52 changes: 48 additions & 4 deletions test/framework/toy_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ def check_toy(self, installpath, outtxt, name='toy', version='0.0', versionprefi

def _test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, verify=True, fails=False, verbose=True,
raise_error=False, test_report=None, name='toy', versionsuffix='', testing=True,
raise_systemexit=False, force=True, test_report_regexs=None, debug=True, trace=True):
raise_systemexit=False, force=True, test_report_regexs=None, debug=True, trace=True,
return_error=False):
"""Perform a toy build."""
if extra_args is None:
extra_args = []
Expand All @@ -190,9 +191,11 @@ def _test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, verify=Tru
args.append('--dump-test-report=%s' % test_report)
args.extend(extra_args)
myerr = None
outtxt = ''
try:
outtxt = self.eb_main(args, logfile=self.dummylogfn, do_build=True, verbose=verbose,
raise_error=raise_error, testing=testing, raise_systemexit=raise_systemexit)
raise_error=raise_error or return_error,
testing=testing, raise_systemexit=raise_systemexit)
except Exception as err:
myerr = err
if raise_error:
Expand Down Expand Up @@ -233,6 +236,8 @@ def _test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, verify=Tru
msg = "Pattern %s found in full test report: %s" % (regex.pattern, test_report_txt)
self.assertTrue(regex.search(test_report_txt), msg)

if return_error:
return outtxt, myerr
return outtxt

def run_test_toy_build_with_output(self, *args, **kwargs):
Expand Down Expand Up @@ -3236,6 +3241,46 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
assert_regex(additional_cc_str, outtxt, stdout)
assert_regex(missing_ptx_str, outtxt, stdout)

# Test case 10a: Additional device code present, missing PTX for highest arch
args = ['--cuda-compute-capabilities=9.0', '--cuda-sanity-check-error-on-failed-checks',
'--cuda-sanity-check-strict']
write_file(cuobjdump_file, cuobjdump_txt_shebang)
write_file(cuobjdump_file, cuobjdump_txt_sm80, append=True)
write_file(cuobjdump_file, cuobjdump_txt_sm80_ptx, append=True)
adjust_permissions(cuobjdump_file, stat.S_IXUSR, add=True) # Make sure our mock cuobjdump is executable

additional_code_error = "Files with additional CUDA device code: 3."
missing_code_error = "Files missing CUDA device code: 3"
missing_ptx_error = "Files missing CUDA PTX code: 3"
test_cases = (
('',
[missing_code_error, additional_code_error, missing_ptx_error], []),
('cuda_sanity_check_strict=False',
[missing_code_error, missing_ptx_error], [additional_code_error]),
('cuda_sanity_check_accept_ptx_as_devcode=True',
[additional_code_error, missing_ptx_error], [missing_code_error]),
('cuda_sanity_check_strict=False\ncuda_sanity_check_accept_ptx_as_devcode=True',
[missing_ptx_error], [additional_code_error, missing_code_error]),
('cuda_sanity_check_error_on_failed_checks=False',
[], [missing_code_error, additional_code_error, missing_ptx_error]),
)
modified_toy_ec = os.path.join(self.test_prefix, 'toy-0.0-modified.eb')
for extra_code, expected_errors, unexpected_errors in test_cases:
with self.subTest('Extra EC params: ' + extra_code):
write_file(modified_toy_ec, toy_ec_txt + f'\n{extra_code}')
with self.mocked_stdout_stderr():
_, error = self._test_toy_build(ec_file=modified_toy_ec, extra_args=args, raise_error=False,
verify=False, return_error=True)
if expected_errors:
self.assertIsInstance(error, EasyBuildError)
error_str = str(error)
else:
error_str = ''
for msg in expected_errors:
self.assertIn(msg, error_str)
for msg in unexpected_errors:
self.assertNotIn(msg, error_str)

# Test case 1a: test with default options, --cuda-compute-capabilities=8.0 and a binary that contains
# 8.0 device code
# This should succeed (since the default for --cuda-sanity-check-error-on-failed-checks is False)
Expand All @@ -3257,7 +3302,7 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
# It should not matter for the result, but triggers slightly different code paths in easyblock.py
# This should succeed (since the default for --cuda-sanity-check-error-on-failed-checks is False)
# as to not break backwards compatibility
write_file(cuobjdump_file, cuobjdump_txt_shebang),
write_file(cuobjdump_file, cuobjdump_txt_shebang)
write_file(cuobjdump_file, cuobjdump_txt_sm90, append=True)
write_file(cuobjdump_file, cuobjdump_txt_sm80_ptx, append=True)
write_file(cuobjdump_file, cuobjdump_txt_sm70, append=True)
Expand Down Expand Up @@ -3363,7 +3408,6 @@ def assert_cuda_report(missing_cc, additional_cc, missing_ptx, log, stdout=None,
"dependencies = [('CUDA', '5.5.22', '', SYSTEM)]",
"cuda_sanity_ignore_files = ['bin/toy']",
])
write_file(toy_ec_cuda, toy_ec_txt)
write_file(toy_whitelist_ec, toy_ec_txt)

args = ['--cuda-compute-capabilities=9.0', '--cuda-sanity-check-error-on-failed-checks',
Expand Down
Loading