Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 70 additions & 110 deletions test/testflows/clickhouse_backup/tests/fips_140_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ def _check_tls_handshake(self, node, target, tls_flag, cipher=None, ciphersuites


@TestStep(Check)
def _check_outbound_tls_handshake(self, node, command, expected_success):
def _check_outbound_tls_handshake(self, node, command, expected_success,
allow_remote_auth_error_as_skip=False):
"""Run `command` inside `node` and assert on the *outbound* TLS outcome.

`command` invokes `clickhouse-backup-fips` with
Expand All @@ -197,6 +198,12 @@ def _check_outbound_tls_handshake(self, node, command, expected_success):

* `expected_success=False` - the FIPS policy MUST reject the
handshake.

In some CI environments the S3 FIPS hostname can resolve to public AWS
instead of the local `openssl s_server` sidecar used by this test. For
those probes, `allow_remote_auth_error_as_skip=True` can skip the check when
an AWS auth failure marker (e.g. `InvalidAccessKeyId`) is present but no TLS
rejection marker is visible.
"""
bounded = f"timeout {CLI_CMD_TIMEOUT_SEC} {command}"
result = node.cmd(bounded, no_checks=True)
Expand All @@ -210,13 +217,24 @@ def _check_outbound_tls_handshake(self, node, command, expected_success):
or "tls: no cipher suite supported" in output_lower
or "tls: protocol version not supported" in output_lower
)
remote_auth_failed = (
"invalidaccesskeyid" in output_lower
or "signaturedoesnotmatch" in output_lower
or "accessdenied" in output_lower
or "statuscode: 403" in output_lower
)

if expected_success:
assert not handshake_rejected, error(
f"Expected the FIPS policy to ACCEPT the handshake, but the output "
f"contains a TLS handshake-failure marker.\n{output}"
)
else:
if allow_remote_auth_error_as_skip and remote_auth_failed:
skip(
"No TLS rejection marker was found and remote auth failed "
"(endpoint resolved outside the local openssl sidecar). "
)
assert handshake_rejected, error(
f"Expected the FIPS policy to REJECT the handshake, but no "
f"TLS handshake-failure marker was found.\n{output}"
Expand Down Expand Up @@ -279,7 +297,8 @@ def _assert_s_server_logs_match_outcome(self, cluster, aux_name, expected_succes
def _check_outbound_tls_with_cipher(self, cluster, backup_fips, *, listen, command,
expected_success, tls_version,
cipher=None, ciphersuites=None,
aux_name='openssl_server'):
aux_name='openssl_server',
allow_remote_auth_error_as_skip=False):
"""One outbound TLS handshake check for one cipher profile.

Brings up an `openssl s_server` container, runs `command` inside
Expand All @@ -304,6 +323,9 @@ def _check_outbound_tls_with_cipher(self, cluster, backup_fips, *, listen, comma
`-tls1_3`); mutually exclusive with `cipher`.
:param aux_name: docker hostname / network alias of the aux container;
callers can name it after a target hostname.
:param allow_remote_auth_error_as_skip: if `True`, a remote auth failure
marker can skip a negative expectation when TLS rejection markers are
absent (used for S3 CI stability when DNS bypasses the sidecar).
"""
ssl_dir = cluster.ssl_certs_dir
try:
Expand All @@ -319,6 +341,7 @@ def _check_outbound_tls_with_cipher(self, cluster, backup_fips, *, listen, comma
node=backup_fips,
command=command,
expected_success=expected_success,
allow_remote_auth_error_as_skip=allow_remote_auth_error_as_skip,
)
_assert_s_server_logs_match_outcome(
cluster=cluster,
Expand Down Expand Up @@ -405,7 +428,7 @@ def gofips140_build_flags_present(self):
backup_fips = _require_fips_container(self)

with When("I run `go version -m` against the FIPS binary"):
result = backup_fips.cmd(f"go version -m {FIPS_BINARY_IN_CONTAINER}")
result = backup_fips.cmd(f"env -u GODEBUG go version -m {FIPS_BINARY_IN_CONTAINER}")
debug(f"STDOUT:\n{result.output}")
debug(f"EXIT CODE: {result.exitcode}")
debug(f"FIPS_GO_BUILD_SETTING: {FIPS_GO_BUILD_SETTING!r}")
Expand All @@ -417,116 +440,36 @@ def gofips140_build_flags_present(self):
)


@TestScenario
@Requirements(
RQ_SRS_013_ClickHouse_BackupUtility_FIPS_GODEBUG_Unset("1.0"),
RQ_SRS_013_ClickHouse_BackupUtility_FIPS_GODEBUG_On("1.0"),
RQ_SRS_013_ClickHouse_BackupUtility_FIPS_GODEBUG_Only("1.0"),
)
def godebug_fips140_modes(self):
"""Validate the three Go FIPS runtime modes for `clickhouse-backup-fips`.

For each Go FIPS runtime mode listed in `FIPS_GODEBUG_MODES`:

* `GODEBUG` not set - FIPS active by build-time default (`GOFIPS140=v1.0.0`).
* `GODEBUG=fips140=on` - FIPS active without strict enforcement.
* `GODEBUG=fips140=only` - FIPS active with strict enforcement
(expected error or panic).

Run two checks against the FIPS-compatible Altinity ClickHouse
server for each mode (for example `altinity/clickhouse-server:25.3.8.30001.altinityfips`) on
secure native TCP `9440`:

1. `clickhouse-backup-fips --version` reports `FIPS 140-3: true`.
2. `clickhouse-backup-fips -c <config> tables` exits `0`.
The CH server container is brought up once and reused across all three modes.
"""

FIPS_GODEBUG_MODES = (None, "fips140=on", "fips140=only")
backup_fips = _require_fips_container(self)
cluster = self.context.cluster

listeners_xml = os.path.join(
cluster.tests_dir,
"configs/clickhouse_fips_server/config.d/listeners.xml",
)

try:
with Given("a dedicated FIPS-compatible Altinity ClickHouse server"):
cluster.start_clickhouse_server_container(
name=FIPS_CH_SERVER_NAME,
image_tag=FIPS_CH_SERVER_IMAGE,
extra_volumes=[
(cluster.ssl_certs_dir, "/etc/clickhouse-server/ssl"),
(listeners_xml, "/etc/clickhouse-server/config.d/listeners.xml"),
],
)

with When("for each Go FIPS runtime mode I check `--version` and `tables`"):
for godebug in FIPS_GODEBUG_MODES:
label = godebug if godebug else "unset" # Show "unset" when GODEBUG is not set.

with Check(f"GODEBUG {label}"):
status, output = _read_fips_status(
backup_fips, FIPS_BINARY_IN_CONTAINER, godebug=godebug,
)

with Then(
f"`clickhouse-backup-fips --version` reports "
f"`{FIPS_VERSION_LABEL} {FIPS_VERSION_TRUE}` (GODEBUG={label})"
):
assert status is not None, error(
f"`{FIPS_VERSION_LABEL}` line missing from "
f"`--version` (GODEBUG={label}):\n{output}"
)
assert status == FIPS_VERSION_TRUE, error(
f"Expected `{FIPS_VERSION_LABEL} {FIPS_VERSION_TRUE}` "
f"(GODEBUG={label}), got `{status}`.\n{output}"
)

with Then(
f"`clickhouse-backup-fips tables` exits 0 against the "
f"FIPS ClickHouse server (GODEBUG={label})"
):
_assert_tables_succeeds(
backup_fips=backup_fips,
config=FIPS_CONNECTIVITY_FIPS_CONFIG_PATH,
godebug=godebug,
)
finally:
cluster.stop_auxiliary_container(FIPS_CH_SERVER_NAME)


@TestStep(Check)
def _assert_tables_succeeds(self, backup_fips, *, config, godebug):
def _assert_tables_succeeds(self, backup_fips, *, config, godebug=None):
"""Run `clickhouse-backup-fips -c <config> tables` and assert it exits 0.

Wrapped in `timeout` so a hung handshake or DNS failure does not
block the tests for a long time; on success the command is fast.

:param godebug: value for `GODEBUG` (e.g. `"fips140=only"`); pass
`None` to leave `GODEBUG` unset and rely on the FIPS binary's
build-time default.
`None` (default) to inherit the FIPS mode from the container.
"""
env_prefix = f"env GODEBUG={godebug} " if godebug else ""
cmd = (
f"timeout {CLI_CMD_TIMEOUT_SEC} {env_prefix}"
f"{FIPS_BINARY_IN_CONTAINER} -c {config} tables"
)
result = backup_fips.cmd(cmd, no_checks=True)
label = godebug if godebug else "unset"
label = godebug if godebug else "container-default"
assert result.exitcode == 0, error(
f"`clickhouse-backup-fips tables` against `{config}` failed "
f"(exit={result.exitcode}, GODEBUG={label}):\n{result.output}"
)


@TestStep(Check)
def _assert_tables_fails(self, backup_fips, *, config, godebug, reason):
def _assert_tables_fails(self, backup_fips, *, config, reason, godebug=None):
"""Run `clickhouse-backup-fips -c <config> tables` and assert it fails.

Negative option to `_assert_tables_succeeds`: require non-zero
exit and ensure no table-list success markers are present.
exit and ensure no table-list success markers are present. Pass `godebug`
`None` (default) to inherit the FIPS mode from the container.
"""
env_prefix = f"env GODEBUG={godebug} " if godebug else ""
cmd = (
Expand All @@ -535,7 +478,7 @@ def _assert_tables_fails(self, backup_fips, *, config, godebug, reason):
)
result = backup_fips.cmd(cmd, no_checks=True)
output = result.output or ""
label = godebug if godebug else "unset"
label = godebug if godebug else "container-default"

assert result.exitcode != 0, error(
f"`clickhouse-backup-fips tables` unexpectedly succeeded against `{config}` "
Expand Down Expand Up @@ -583,7 +526,6 @@ def connectivity_against_fips_clickhouse_server(self):
_assert_tables_succeeds(
backup_fips=backup_fips,
config=FIPS_CONNECTIVITY_FIPS_CONFIG_PATH,
godebug="fips140=only",
)
finally:
cluster.stop_auxiliary_container(FIPS_CH_SERVER_NAME)
Expand Down Expand Up @@ -628,7 +570,6 @@ def connectivity_against_non_fips_clickhouse_server(self):
_assert_tables_fails(
backup_fips=backup_fips,
config=FIPS_CONNECTIVITY_NONFIPS_CONFIG_PATH,
godebug="fips140=only",
reason=(
"The non-FIPS ClickHouse server runs with image defaults "
"and does not expose the secure native TCP port `:9440` "
Expand Down Expand Up @@ -733,13 +674,12 @@ def inbound_tls_cipher_negotiation(self):

try:
with Given(
"I start clickhouse-backup-fips server with GODEBUG=fips140=only and the TLS API config",
"I start clickhouse-backup-fips server with the TLS API config",
description=f"config={FIPS_TLS_CONFIG_PATH} listen=:{FIPS_TLS_LISTEN_PORT}",
):
backup_fips.start_server(
binary=FIPS_BINARY_IN_CONTAINER,
config=FIPS_TLS_CONFIG_PATH,
extra_env={"GODEBUG": "fips140=only"},
listen_port=FIPS_TLS_LISTEN_PORT,
timeout=60,
)
Expand Down Expand Up @@ -828,7 +768,7 @@ def outbound_tls_cipher_negotiation(self):
cluster = self.context.cluster

cmd = (
f"env GODEBUG=fips140=only {FIPS_BINARY_IN_CONTAINER} "
f"{FIPS_BINARY_IN_CONTAINER} "
f"-c {FIPS_OUTBOUND_CH_CONFIG_PATH} tables 2>&1" # `2>&1` redirects stderr to stdout.
)

Expand Down Expand Up @@ -873,17 +813,16 @@ def outbound_tls_cipher_negotiation(self):
RQ_SRS_013_ClickHouse_BackupUtility_FIPS_TLS_Outbound_S3_Ciphers_Approved("1.0"),
)
def outbound_tls_to_s3_endpoint_with_openssl_s_server(self):
"""Validate `clickhouse-backup-fips` can connect to an S3 HTTPS endpoint over a FIPS-approved cipher.
"""Validate outbound S3 TLS policy with approved and non-approved profiles.

For each FIPS-approved cipher / cipher suite, start an `openssl s_server`
that offers only that profile and run:

GODEBUG=fips140=only clickhouse-backup-fips -c <config> list remote

inside the FIPS container, asserting the TLS handshake is NOT rejected by
the FIPS policy.

This scenario is a positive *compatibility* check only.
inside the FIPS container, asserting:
* approved profiles are accepted by FIPS policy;
* non-approved profiles are rejected by FIPS policy.

The `s_server` container reuses the cluster's static SSL fixtures and the
`clickhouse-backup-fips` config is the static
Expand All @@ -908,15 +847,13 @@ def outbound_tls_to_s3_endpoint_with_openssl_s_server(self):
backup_fips = _require_fips_container(self)
cluster = self.context.cluster

# `env GODEBUG=...` so the `timeout` prefix added inside
# `_check_outbound_tls_handshake` can exec a real program.
# cmd - command to run inside the FIPS container
cmd = (
f"env GODEBUG=fips140=only {FIPS_BINARY_IN_CONTAINER} "
f"{FIPS_BINARY_IN_CONTAINER} "
f"-c {FIPS_OUTBOUND_S3_CONFIG_PATH} list remote 2>&1" # `2>&1` redirects stderr to stdout.
)

with When("I try each FIPS-approved TLSv1.3 cipher suite on the S3 endpoint"):
with Check("I try each FIPS-approved TLSv1.3 cipher suite on the S3 endpoint"):
for ciphersuite in FIPS_TLS13_APPROVED:
with Check(f"TLSv1.3 ciphersuite {ciphersuite} should be accepted"):
_check_outbound_tls_with_cipher(
Expand All @@ -926,7 +863,7 @@ def outbound_tls_to_s3_endpoint_with_openssl_s_server(self):
ciphersuites=ciphersuite, command=cmd, expected_success=True,
)

with And("I try each FIPS-approved TLSv1.2 cipher on the S3 endpoint"):
with Check("I try each FIPS-approved TLSv1.2 cipher on the S3 endpoint"):
for cipher in FIPS_TLS12_APPROVED:
with Check(f"TLSv1.2 cipher {cipher} should be accepted"):
_check_outbound_tls_with_cipher(
Expand All @@ -936,6 +873,28 @@ def outbound_tls_to_s3_endpoint_with_openssl_s_server(self):
cipher=cipher, command=cmd, expected_success=True,
)

with Check("I try each non-FIPS TLSv1.3 cipher suite on the S3 endpoint"):
for ciphersuite in NON_FIPS_TLS13:
with Check(f"TLSv1.3 ciphersuite {ciphersuite} should be rejected"):
_check_outbound_tls_with_cipher(
cluster=cluster, backup_fips=backup_fips,
aux_name=OPENSSL_S3_FIPS_AUX_NAME,
listen=FIPS_OUTBOUND_S3_TLS_PORT, tls_version="-tls1_3",
ciphersuites=ciphersuite, command=cmd, expected_success=False,
allow_remote_auth_error_as_skip=True,
)

with Check("I try each non-FIPS TLSv1.2 cipher on the S3 endpoint"):
for cipher in NON_FIPS_TLS12_OUTBOUND:
with Check(f"TLSv1.2 cipher {cipher} should be rejected"):
_check_outbound_tls_with_cipher(
cluster=cluster, backup_fips=backup_fips,
aux_name=OPENSSL_S3_FIPS_AUX_NAME,
listen=FIPS_OUTBOUND_S3_TLS_PORT, tls_version="-tls1_2",
cipher=cipher, command=cmd, expected_success=False,
allow_remote_auth_error_as_skip=True,
)


# Startup/core CASTs used during `clickhouse-backup-fips --version`. Forcing
# any of these via `failfipscast` must abort startup deterministically.
Expand Down Expand Up @@ -1040,6 +999,11 @@ def forced_cast_failures(self):
)
else:
with Then(f"`{cast}` was not exercised and startup stayed clean"):
assert result.exitcode == 0, error(
f"first-use CAST `{cast}` exited non-zero unexpectedly.\n{output}"
)

# first-use CASTs abort only when the CAST is actually reached; otherwise startup stays clean without the marker.
assert not marker_present, error(
f"first-use CAST `{cast}` showed "
f"`{FIPS_FAILFIPSCAST_MARKER}` but exited 0.\n{output}"
Expand Down Expand Up @@ -1096,7 +1060,6 @@ def connection_to_fips_clickhouse_with_nonfips_config(self):
):
cmd = (
f"timeout {CLI_CMD_TIMEOUT_SEC} "
f"env GODEBUG=fips140=only "
f"{FIPS_BINARY_IN_CONTAINER} "
f"-c {FIPS_CONNECTIVITY_FIPS_MISCONFIG_PATH} tables 2>&1" # `2>&1` redirects stderr to stdout.
)
Expand Down Expand Up @@ -1138,11 +1101,10 @@ def server_listens_only_on_fips_api_port(self):
backup_fips = _require_fips_container(self)

try:
with Given("clickhouse-backup-fips server is started with GODEBUG=fips140=only"):
with Given("clickhouse-backup-fips server is started"):
backup_fips.start_server(
binary=FIPS_BINARY_IN_CONTAINER,
config=FIPS_TLS_CONFIG_PATH,
extra_env={"GODEBUG": "fips140=only"},
listen_port=FIPS_TLS_LISTEN_PORT,
timeout=60,
)
Expand Down Expand Up @@ -1207,7 +1169,6 @@ def outbound_tls_to_nonfips_clickhouse_with_cipher_profile(self):
with When("I run `clickhouse-backup-fips tables` over secure native TCP 9440"):
cmd = (
f"timeout {CLI_CMD_TIMEOUT_SEC} "
f"env GODEBUG=fips140=only "
f"{FIPS_BINARY_IN_CONTAINER} "
f"-c {FIPS_NONFIPS_CH_TLS_FIPSCIPHER_CONFIG_PATH} tables 2>&1"
)
Expand Down Expand Up @@ -1297,7 +1258,6 @@ def fips_140_3(self):
Scenario(run=clickhouse_backup_fips_version_output, flags=TE)
Scenario(run=clickhouse_backup_fips_version_output_negative_check, flags=TE)
Scenario(run=gofips140_build_flags_present, flags=TE)
Scenario(run=godebug_fips140_modes, flags=TE) # move to regression.py
Scenario(run=connectivity_against_non_fips_clickhouse_server, flags=TE)
Scenario(run=connectivity_against_fips_clickhouse_server, flags=TE)
Scenario(run=fips_integrity_self_test_failure_on_tampered_binary, flags=TE)
Expand Down