diff --git a/test/testflows/clickhouse_backup/tests/fips_140_3.py b/test/testflows/clickhouse_backup/tests/fips_140_3.py index efa7bde1..af913e5d 100644 --- a/test/testflows/clickhouse_backup/tests/fips_140_3.py +++ b/test/testflows/clickhouse_backup/tests/fips_140_3.py @@ -183,7 +183,8 @@ def _check_tls_handshake(self, node, target, tls_flag, cipher=None, ciphersuites @TestStep(Check) -def _check_outbound_tls_handshake(self, node, command, expected_success): +def _check_outbound_tls_handshake(self, node, command, expected_success, + allow_remote_auth_error_as_skip=False): """Run `command` inside `node` and assert on the *outbound* TLS outcome. `command` invokes `clickhouse-backup-fips` with @@ -197,6 +198,12 @@ def _check_outbound_tls_handshake(self, node, command, expected_success): * `expected_success=False` - the FIPS policy MUST reject the handshake. + + In some CI environments the S3 FIPS hostname can resolve to public AWS + instead of the local `openssl s_server` sidecar used by this test. For + those probes, `allow_remote_auth_error_as_skip=True` can skip the check when + an AWS auth failure marker (e.g. `InvalidAccessKeyId`) is present but no TLS + rejection marker is visible. """ bounded = f"timeout {CLI_CMD_TIMEOUT_SEC} {command}" result = node.cmd(bounded, no_checks=True) @@ -210,6 +217,12 @@ def _check_outbound_tls_handshake(self, node, command, expected_success): or "tls: no cipher suite supported" in output_lower or "tls: protocol version not supported" in output_lower ) + remote_auth_failed = ( + "invalidaccesskeyid" in output_lower + or "signaturedoesnotmatch" in output_lower + or "accessdenied" in output_lower + or "statuscode: 403" in output_lower + ) if expected_success: assert not handshake_rejected, error( @@ -217,6 +230,11 @@ def _check_outbound_tls_handshake(self, node, command, expected_success): f"contains a TLS handshake-failure marker.\n{output}" ) else: + if allow_remote_auth_error_as_skip and remote_auth_failed: + skip( + "No TLS rejection marker was found and remote auth failed " + "(endpoint resolved outside the local openssl sidecar). " + ) assert handshake_rejected, error( f"Expected the FIPS policy to REJECT the handshake, but no " f"TLS handshake-failure marker was found.\n{output}" @@ -279,7 +297,8 @@ def _assert_s_server_logs_match_outcome(self, cluster, aux_name, expected_succes def _check_outbound_tls_with_cipher(self, cluster, backup_fips, *, listen, command, expected_success, tls_version, cipher=None, ciphersuites=None, - aux_name='openssl_server'): + aux_name='openssl_server', + allow_remote_auth_error_as_skip=False): """One outbound TLS handshake check for one cipher profile. Brings up an `openssl s_server` container, runs `command` inside @@ -304,6 +323,9 @@ def _check_outbound_tls_with_cipher(self, cluster, backup_fips, *, listen, comma `-tls1_3`); mutually exclusive with `cipher`. :param aux_name: docker hostname / network alias of the aux container; callers can name it after a target hostname. + :param allow_remote_auth_error_as_skip: if `True`, a remote auth failure + marker can skip a negative expectation when TLS rejection markers are + absent (used for S3 CI stability when DNS bypasses the sidecar). """ ssl_dir = cluster.ssl_certs_dir try: @@ -319,6 +341,7 @@ def _check_outbound_tls_with_cipher(self, cluster, backup_fips, *, listen, comma node=backup_fips, command=command, expected_success=expected_success, + allow_remote_auth_error_as_skip=allow_remote_auth_error_as_skip, ) _assert_s_server_logs_match_outcome( cluster=cluster, @@ -405,7 +428,7 @@ def gofips140_build_flags_present(self): backup_fips = _require_fips_container(self) with When("I run `go version -m` against the FIPS binary"): - result = backup_fips.cmd(f"go version -m {FIPS_BINARY_IN_CONTAINER}") + result = backup_fips.cmd(f"env -u GODEBUG go version -m {FIPS_BINARY_IN_CONTAINER}") debug(f"STDOUT:\n{result.output}") debug(f"EXIT CODE: {result.exitcode}") debug(f"FIPS_GO_BUILD_SETTING: {FIPS_GO_BUILD_SETTING!r}") @@ -417,96 +440,15 @@ def gofips140_build_flags_present(self): ) -@TestScenario -@Requirements( - RQ_SRS_013_ClickHouse_BackupUtility_FIPS_GODEBUG_Unset("1.0"), - RQ_SRS_013_ClickHouse_BackupUtility_FIPS_GODEBUG_On("1.0"), - RQ_SRS_013_ClickHouse_BackupUtility_FIPS_GODEBUG_Only("1.0"), -) -def godebug_fips140_modes(self): - """Validate the three Go FIPS runtime modes for `clickhouse-backup-fips`. - - For each Go FIPS runtime mode listed in `FIPS_GODEBUG_MODES`: - - * `GODEBUG` not set - FIPS active by build-time default (`GOFIPS140=v1.0.0`). - * `GODEBUG=fips140=on` - FIPS active without strict enforcement. - * `GODEBUG=fips140=only` - FIPS active with strict enforcement - (expected error or panic). - - Run two checks against the FIPS-compatible Altinity ClickHouse - server for each mode (for example `altinity/clickhouse-server:25.3.8.30001.altinityfips`) on - secure native TCP `9440`: - - 1. `clickhouse-backup-fips --version` reports `FIPS 140-3: true`. - 2. `clickhouse-backup-fips -c tables` exits `0`. - The CH server container is brought up once and reused across all three modes. - """ - - FIPS_GODEBUG_MODES = (None, "fips140=on", "fips140=only") - backup_fips = _require_fips_container(self) - cluster = self.context.cluster - - listeners_xml = os.path.join( - cluster.tests_dir, - "configs/clickhouse_fips_server/config.d/listeners.xml", - ) - - try: - with Given("a dedicated FIPS-compatible Altinity ClickHouse server"): - cluster.start_clickhouse_server_container( - name=FIPS_CH_SERVER_NAME, - image_tag=FIPS_CH_SERVER_IMAGE, - extra_volumes=[ - (cluster.ssl_certs_dir, "/etc/clickhouse-server/ssl"), - (listeners_xml, "/etc/clickhouse-server/config.d/listeners.xml"), - ], - ) - - with When("for each Go FIPS runtime mode I check `--version` and `tables`"): - for godebug in FIPS_GODEBUG_MODES: - label = godebug if godebug else "unset" # Show "unset" when GODEBUG is not set. - - with Check(f"GODEBUG {label}"): - status, output = _read_fips_status( - backup_fips, FIPS_BINARY_IN_CONTAINER, godebug=godebug, - ) - - with Then( - f"`clickhouse-backup-fips --version` reports " - f"`{FIPS_VERSION_LABEL} {FIPS_VERSION_TRUE}` (GODEBUG={label})" - ): - assert status is not None, error( - f"`{FIPS_VERSION_LABEL}` line missing from " - f"`--version` (GODEBUG={label}):\n{output}" - ) - assert status == FIPS_VERSION_TRUE, error( - f"Expected `{FIPS_VERSION_LABEL} {FIPS_VERSION_TRUE}` " - f"(GODEBUG={label}), got `{status}`.\n{output}" - ) - - with Then( - f"`clickhouse-backup-fips tables` exits 0 against the " - f"FIPS ClickHouse server (GODEBUG={label})" - ): - _assert_tables_succeeds( - backup_fips=backup_fips, - config=FIPS_CONNECTIVITY_FIPS_CONFIG_PATH, - godebug=godebug, - ) - finally: - cluster.stop_auxiliary_container(FIPS_CH_SERVER_NAME) - - @TestStep(Check) -def _assert_tables_succeeds(self, backup_fips, *, config, godebug): +def _assert_tables_succeeds(self, backup_fips, *, config, godebug=None): """Run `clickhouse-backup-fips -c tables` and assert it exits 0. Wrapped in `timeout` so a hung handshake or DNS failure does not block the tests for a long time; on success the command is fast. :param godebug: value for `GODEBUG` (e.g. `"fips140=only"`); pass - `None` to leave `GODEBUG` unset and rely on the FIPS binary's - build-time default. + `None` (default) to inherit the FIPS mode from the container. """ env_prefix = f"env GODEBUG={godebug} " if godebug else "" cmd = ( @@ -514,7 +456,7 @@ def _assert_tables_succeeds(self, backup_fips, *, config, godebug): f"{FIPS_BINARY_IN_CONTAINER} -c {config} tables" ) result = backup_fips.cmd(cmd, no_checks=True) - label = godebug if godebug else "unset" + label = godebug if godebug else "container-default" assert result.exitcode == 0, error( f"`clickhouse-backup-fips tables` against `{config}` failed " f"(exit={result.exitcode}, GODEBUG={label}):\n{result.output}" @@ -522,11 +464,12 @@ def _assert_tables_succeeds(self, backup_fips, *, config, godebug): @TestStep(Check) -def _assert_tables_fails(self, backup_fips, *, config, godebug, reason): +def _assert_tables_fails(self, backup_fips, *, config, reason, godebug=None): """Run `clickhouse-backup-fips -c tables` and assert it fails. Negative option to `_assert_tables_succeeds`: require non-zero - exit and ensure no table-list success markers are present. + exit and ensure no table-list success markers are present. Pass `godebug` + `None` (default) to inherit the FIPS mode from the container. """ env_prefix = f"env GODEBUG={godebug} " if godebug else "" cmd = ( @@ -535,7 +478,7 @@ def _assert_tables_fails(self, backup_fips, *, config, godebug, reason): ) result = backup_fips.cmd(cmd, no_checks=True) output = result.output or "" - label = godebug if godebug else "unset" + label = godebug if godebug else "container-default" assert result.exitcode != 0, error( f"`clickhouse-backup-fips tables` unexpectedly succeeded against `{config}` " @@ -583,7 +526,6 @@ def connectivity_against_fips_clickhouse_server(self): _assert_tables_succeeds( backup_fips=backup_fips, config=FIPS_CONNECTIVITY_FIPS_CONFIG_PATH, - godebug="fips140=only", ) finally: cluster.stop_auxiliary_container(FIPS_CH_SERVER_NAME) @@ -628,7 +570,6 @@ def connectivity_against_non_fips_clickhouse_server(self): _assert_tables_fails( backup_fips=backup_fips, config=FIPS_CONNECTIVITY_NONFIPS_CONFIG_PATH, - godebug="fips140=only", reason=( "The non-FIPS ClickHouse server runs with image defaults " "and does not expose the secure native TCP port `:9440` " @@ -733,13 +674,12 @@ def inbound_tls_cipher_negotiation(self): try: with Given( - "I start clickhouse-backup-fips server with GODEBUG=fips140=only and the TLS API config", + "I start clickhouse-backup-fips server with the TLS API config", description=f"config={FIPS_TLS_CONFIG_PATH} listen=:{FIPS_TLS_LISTEN_PORT}", ): backup_fips.start_server( binary=FIPS_BINARY_IN_CONTAINER, config=FIPS_TLS_CONFIG_PATH, - extra_env={"GODEBUG": "fips140=only"}, listen_port=FIPS_TLS_LISTEN_PORT, timeout=60, ) @@ -828,7 +768,7 @@ def outbound_tls_cipher_negotiation(self): cluster = self.context.cluster cmd = ( - f"env GODEBUG=fips140=only {FIPS_BINARY_IN_CONTAINER} " + f"{FIPS_BINARY_IN_CONTAINER} " f"-c {FIPS_OUTBOUND_CH_CONFIG_PATH} tables 2>&1" # `2>&1` redirects stderr to stdout. ) @@ -873,17 +813,16 @@ def outbound_tls_cipher_negotiation(self): RQ_SRS_013_ClickHouse_BackupUtility_FIPS_TLS_Outbound_S3_Ciphers_Approved("1.0"), ) def outbound_tls_to_s3_endpoint_with_openssl_s_server(self): - """Validate `clickhouse-backup-fips` can connect to an S3 HTTPS endpoint over a FIPS-approved cipher. + """Validate outbound S3 TLS policy with approved and non-approved profiles. For each FIPS-approved cipher / cipher suite, start an `openssl s_server` that offers only that profile and run: GODEBUG=fips140=only clickhouse-backup-fips -c list remote - inside the FIPS container, asserting the TLS handshake is NOT rejected by - the FIPS policy. - - This scenario is a positive *compatibility* check only. + inside the FIPS container, asserting: + * approved profiles are accepted by FIPS policy; + * non-approved profiles are rejected by FIPS policy. The `s_server` container reuses the cluster's static SSL fixtures and the `clickhouse-backup-fips` config is the static @@ -908,15 +847,13 @@ def outbound_tls_to_s3_endpoint_with_openssl_s_server(self): backup_fips = _require_fips_container(self) cluster = self.context.cluster - # `env GODEBUG=...` so the `timeout` prefix added inside - # `_check_outbound_tls_handshake` can exec a real program. # cmd - command to run inside the FIPS container cmd = ( - f"env GODEBUG=fips140=only {FIPS_BINARY_IN_CONTAINER} " + f"{FIPS_BINARY_IN_CONTAINER} " f"-c {FIPS_OUTBOUND_S3_CONFIG_PATH} list remote 2>&1" # `2>&1` redirects stderr to stdout. ) - with When("I try each FIPS-approved TLSv1.3 cipher suite on the S3 endpoint"): + with Check("I try each FIPS-approved TLSv1.3 cipher suite on the S3 endpoint"): for ciphersuite in FIPS_TLS13_APPROVED: with Check(f"TLSv1.3 ciphersuite {ciphersuite} should be accepted"): _check_outbound_tls_with_cipher( @@ -926,7 +863,7 @@ def outbound_tls_to_s3_endpoint_with_openssl_s_server(self): ciphersuites=ciphersuite, command=cmd, expected_success=True, ) - with And("I try each FIPS-approved TLSv1.2 cipher on the S3 endpoint"): + with Check("I try each FIPS-approved TLSv1.2 cipher on the S3 endpoint"): for cipher in FIPS_TLS12_APPROVED: with Check(f"TLSv1.2 cipher {cipher} should be accepted"): _check_outbound_tls_with_cipher( @@ -936,6 +873,28 @@ def outbound_tls_to_s3_endpoint_with_openssl_s_server(self): cipher=cipher, command=cmd, expected_success=True, ) + with Check("I try each non-FIPS TLSv1.3 cipher suite on the S3 endpoint"): + for ciphersuite in NON_FIPS_TLS13: + with Check(f"TLSv1.3 ciphersuite {ciphersuite} should be rejected"): + _check_outbound_tls_with_cipher( + cluster=cluster, backup_fips=backup_fips, + aux_name=OPENSSL_S3_FIPS_AUX_NAME, + listen=FIPS_OUTBOUND_S3_TLS_PORT, tls_version="-tls1_3", + ciphersuites=ciphersuite, command=cmd, expected_success=False, + allow_remote_auth_error_as_skip=True, + ) + + with Check("I try each non-FIPS TLSv1.2 cipher on the S3 endpoint"): + for cipher in NON_FIPS_TLS12_OUTBOUND: + with Check(f"TLSv1.2 cipher {cipher} should be rejected"): + _check_outbound_tls_with_cipher( + cluster=cluster, backup_fips=backup_fips, + aux_name=OPENSSL_S3_FIPS_AUX_NAME, + listen=FIPS_OUTBOUND_S3_TLS_PORT, tls_version="-tls1_2", + cipher=cipher, command=cmd, expected_success=False, + allow_remote_auth_error_as_skip=True, + ) + # Startup/core CASTs used during `clickhouse-backup-fips --version`. Forcing # any of these via `failfipscast` must abort startup deterministically. @@ -1040,6 +999,11 @@ def forced_cast_failures(self): ) else: with Then(f"`{cast}` was not exercised and startup stayed clean"): + assert result.exitcode == 0, error( + f"first-use CAST `{cast}` exited non-zero unexpectedly.\n{output}" + ) + + # first-use CASTs abort only when the CAST is actually reached; otherwise startup stays clean without the marker. assert not marker_present, error( f"first-use CAST `{cast}` showed " f"`{FIPS_FAILFIPSCAST_MARKER}` but exited 0.\n{output}" @@ -1096,7 +1060,6 @@ def connection_to_fips_clickhouse_with_nonfips_config(self): ): cmd = ( f"timeout {CLI_CMD_TIMEOUT_SEC} " - f"env GODEBUG=fips140=only " f"{FIPS_BINARY_IN_CONTAINER} " f"-c {FIPS_CONNECTIVITY_FIPS_MISCONFIG_PATH} tables 2>&1" # `2>&1` redirects stderr to stdout. ) @@ -1138,11 +1101,10 @@ def server_listens_only_on_fips_api_port(self): backup_fips = _require_fips_container(self) try: - with Given("clickhouse-backup-fips server is started with GODEBUG=fips140=only"): + with Given("clickhouse-backup-fips server is started"): backup_fips.start_server( binary=FIPS_BINARY_IN_CONTAINER, config=FIPS_TLS_CONFIG_PATH, - extra_env={"GODEBUG": "fips140=only"}, listen_port=FIPS_TLS_LISTEN_PORT, timeout=60, ) @@ -1207,7 +1169,6 @@ def outbound_tls_to_nonfips_clickhouse_with_cipher_profile(self): with When("I run `clickhouse-backup-fips tables` over secure native TCP 9440"): cmd = ( f"timeout {CLI_CMD_TIMEOUT_SEC} " - f"env GODEBUG=fips140=only " f"{FIPS_BINARY_IN_CONTAINER} " f"-c {FIPS_NONFIPS_CH_TLS_FIPSCIPHER_CONFIG_PATH} tables 2>&1" ) @@ -1297,7 +1258,6 @@ def fips_140_3(self): Scenario(run=clickhouse_backup_fips_version_output, flags=TE) Scenario(run=clickhouse_backup_fips_version_output_negative_check, flags=TE) Scenario(run=gofips140_build_flags_present, flags=TE) - Scenario(run=godebug_fips140_modes, flags=TE) # move to regression.py Scenario(run=connectivity_against_non_fips_clickhouse_server, flags=TE) Scenario(run=connectivity_against_fips_clickhouse_server, flags=TE) Scenario(run=fips_integrity_self_test_failure_on_tampered_binary, flags=TE)