From b1350674fbd0a6a8d398620fea5ced216fc8eb62 Mon Sep 17 00:00:00 2001 From: Sri Harsha Date: Wed, 1 Jul 2026 18:58:07 +0000 Subject: [PATCH 1/2] fix Ubuntu2204 HTTPSProxy PrivateDNS CSE exit50 kubelet --- e2e/const.go | 10 +++++++++ e2e/vmss.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++- e2e/vmss_test.go | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 e2e/vmss_test.go diff --git a/e2e/const.go b/e2e/const.go index 81216df302b..f1d9041e626 100644 --- a/e2e/const.go +++ b/e2e/const.go @@ -11,6 +11,16 @@ const ( linuxExtensionExitCodeStr = `Enable failed: failed to execute command: command terminated with exit status=(\d+)` ) +// cseExitCodeOutboundConnFail is the CSE exit code for ERR_OUTBOUND_CONN_FAIL +// (see parts/linux/cloud-init/artifacts/cse_helpers.sh). It indicates the node's +// outbound connectivity preflight check (curl to mcr.microsoft.com, optionally routed +// through the e2e HTTP proxy) failed all retries and the script exited before kubelet +// started. In the e2e environment this is a known low-rate transient infrastructure +// flake rather than a product regression, so the harness retries node provisioning a +// bounded number of times to reduce PR-gate noise. A genuine regression fails on every +// attempt and still surfaces after the retry budget is exhausted. +const cseExitCodeOutboundConnFail = "50" + // test data used across multiple test cases const ( encodedTestCert = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUgvVENDQmVXZ0F3SUJBZ0lRYUJZRTMvTTA4WEhZQ25OVm1jRkJjakFOQmdrcWhraUc5dzBCQVFzRkFEQnkKTVFzd0NRWURWUVFHRXdKVlV6RU9NQXdHQTFVRUNBd0ZWR1Y0WVhNeEVEQU9CZ05WQkFjTUIwaHZkWE4wYjI0eApFVEFQQmdOVkJBb01DRk5UVENCRGIzSndNUzR3TEFZRFZRUUREQ1ZUVTB3dVkyOXRJRVZXSUZOVFRDQkpiblJsCmNtMWxaR2xoZEdVZ1EwRWdVbE5CSUZJek1CNFhEVEl3TURRd01UQXdOVGd6TTFvWERUSXhNRGN4TmpBd05UZ3oKTTFvd2diMHhDekFKQmdOVkJBWVRBbFZUTVE0d0RBWURWUVFJREFWVVpYaGhjekVRTUE0R0ExVUVCd3dIU0c5MQpjM1J2YmpFUk1BOEdBMVVFQ2d3SVUxTk1JRU52Y25BeEZqQVVCZ05WQkFVVERVNVdNakF3T0RFMk1UUXlORE14CkZEQVNCZ05WQkFNTUMzZDNkeTV6YzJ3dVkyOXRNUjB3R3dZRFZRUVBEQlJRY21sMllYUmxJRTl5WjJGdWFYcGgKZEdsdmJqRVhNQlVHQ3lzR0FRUUJnamM4QWdFQ0RBWk9aWFpoWkdFeEV6QVJCZ3NyQmdFRUFZSTNQQUlCQXhNQwpWVk13Z2dFaU1BMEdDU3FHU0liM0RRRUJBUVVBQTRJQkR3QXdnZ0VLQW9JQkFRREhoZVJrYmIxRkNjN3hSS3N0CndLMEpJR2FLWTh0N0piUzJiUTJiNllJSkRnbkh1SVlIcUJyQ1VWNzlvZWxpa2tva1JrRnZjdnBhS2luRkhEUUgKVXBXRUk2UlVFUlltU0NnM084V2k0MnVPY1YyQjVaYWJtWENrd2R4WTVFY2w1MUJiTThVbkdkb0FHYmRObWlSbQpTbVRqY3MrbGhNeGc0ZkZZNmxCcGlFVkZpR1VqR1JSKzYxUjY3THo2VTRLSmVMTmNDbTA3UXdGWUtCbXBpMDhnCmR5Z1N2UmRVdzU1Sm9wcmVkaitWR3RqVWtCNGhGVDRHUVgvZ2h0NjlSbHF6Lys4dTBkRVFraHVVdXVjcnFhbG0KU0d5NDNIUndCZkRLRndZZVdNN0NQTWQ1ZS9kTyt0MDh0OFBianpWVFR2NWhRRENzRVlJVjJUN0FGSTlTY054TQpraDcvQWdNQkFBR2pnZ05CTUlJRFBUQWZCZ05WSFNNRUdEQVdnQlMvd1ZxSC95ajZRVDM5dDAva0hhK2dZVmdwCnZUQi9CZ2dyQmdFRkJRY0JBUVJ6TUhFd1RRWUlLd1lCQlFVSE1BS0dRV2gwZEhBNkx5OTNkM2N1YzNOc0xtTnYKYlM5eVpYQnZjMmwwYjNKNUwxTlRUR052YlMxVGRXSkRRUzFGVmkxVFUwd3RVbE5CTFRRd09UWXRVak11WTNKMApNQ0FHQ0NzR0FRVUZCekFCaGhSb2RIUndPaTh2YjJOemNITXVjM05zTG1OdmJUQWZCZ05WSFJFRUdEQVdnZ3QzCmQzY3VjM05zTG1OdmJZSUhjM05zTG1OdmJUQmZCZ05WSFNBRVdEQldNQWNHQldlQkRBRUJNQTBHQ3lxRWFBR0cKOW5jQ0JRRUJNRHdHRENzR0FRUUJncWt3QVFNQkJEQXNNQ29HQ0NzR0FRVUZCd0lCRmg1b2RIUndjem92TDNkMwpkeTV6YzJ3dVkyOXRMM0psY0c5emFYUnZjbmt3SFFZRFZSMGxCQll3RkFZSUt3WUJCUVVIQXdJR0NDc0dBUVVGCkJ3TUJNRWdHQTFVZEh3UkJNRDh3UGFBN29EbUdOMmgwZEhBNkx5OWpjbXh6TG5OemJDNWpiMjB2VTFOTVkyOXQKTFZOMVlrTkJMVVZXTFZOVFRDMVNVMEV0TkRBNU5pMVNNeTVqY213d0hRWURWUjBPQkJZRUZBREFGVUlhenc1cgpaSUhhcG5SeElVbnB3K0dMTUE0R0ExVWREd0VCL3dRRUF3SUZvRENDQVgwR0Npc0dBUVFCMW5rQ0JBSUVnZ0Z0CkJJSUJhUUZuQUhjQTlseVVMOUYzTUNJVVZCZ0lNSlJXanVOTkV4a3p2OThNTHlBTHpFN3haT01BQUFGeE0waG8KYndBQUJBTUFTREJHQWlFQTZ4ZWxpTlI4R2svNjNwWWRuUy92T3gvQ2pwdEVNRXY4OVdXaDEvdXJXSUVDSVFEeQpCcmVIVTI1RHp3dWtRYVJRandXNjU1WkxrcUNueGJ4UVdSaU9lbWo5SkFCMUFKUWd2QjZPMVkxc2lITWZnb3NpCkxBM1IyazFlYkUrVVBXSGJUaTlZVGFMQ0FBQUJjVE5JYU53QUFBUURBRVl3UkFJZ0dSRTR3emFiTlJkRDhrcS8KdkZQM3RRZTJobTB4NW5YdWxvd2g0SWJ3M2xrQ0lGWWIvM2xTRHBsUzdBY1I0citYcFd0RUtTVEZXSm1OQ1JiYwpYSnVyMlJHQkFIVUE3c0NWN28xeVpBK1M0OE81RzhjU28ybHFDWHRMYWhvVU9PWkhzc3Z0eGZrQUFBRnhNMGhvCjh3QUFCQU1BUmpCRUFpQjZJdmJvV3NzM1I0SXRWd2plYmw3RDN5b0ZhWDBORGgyZFdoaGd3Q3hySHdJZ0NmcTcKb2NNQzV0KzFqaTVNNXhhTG1QQzRJK1dYM0kvQVJrV1N5aU83SVFjd0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dJQgpBQ2V1dXI0UW51anFtZ3VTckhVM21oZitjSm9kelRRTnFvNHRkZStQRDEvZUZkWUFFTHU4eEYrMEF0N3hKaVBZCmk1Ukt3aWx5UDU2diszaVkyVDlsdzdTOFRKMDQxVkxoYUlLcDE0TXpTVXpSeWVvT0FzSjdRQURNQ2xIS1VEbEgKVVUycE51bzg4WTZpZ292VDNic253Sk5pRVFOcXltU1NZaGt0dzB0YWR1b3FqcVhuMDZnc1Zpb1dUVkRYeXNkNQpxRXg0dDZzSWdJY01tMjZZSDF2SnBDUUVoS3BjMnkwN2dSa2tsQlpSdE1qVGh2NGNYeXlNWDd1VGNkVDdBSkJQCnVlaWZDb1YyNUp4WHVvOGQ1MTM5Z3dQMUJBZTdJQlZQeDJ1N0tOL1V5T1hkWm13TWYvVG1GR3dEZENmc3lIZi8KWnNCMndMSG96VFlvQVZtUTlGb1UxSkxnY1ZpdnFKK3ZObEJoSFhobHhNZE4wajgwUjlOejZFSWdsUWplSzNPOApJL2NGR20vQjgrNDJoT2xDSWQ5WmR0bmRKY1JKVmppMHdEMHF3ZXZDYWZBOWpKbEh2L2pzRStJOVV6NmNwQ3loCnN3K2xyRmR4VWdxVTU4YXhxZUs4OUZSK05vNHEwSUlPK0ppMXJKS3I5bmtTQjBCcVhvelZuRTFZQi9LTHZkSXMKdVlaSnVxYjJwS2t1K3p6VDZnVXdIVVRadkJpTk90WEw0Tnh3Yy9LVDdXek9TZDJ3UDEwUUk4REtnNHZmaU5EcwpIV21CMWM0S2ppNmdPZ0E1dVNVemFHbXEvdjRWbmNLNVVyK245TGJmbmZMYzI4SjVmdC9Hb3Rpbk15RGszaWFyCkYxMFlscWNPbWVYMXVGbUtiZGkvWG9yR2xrQ29NRjNURHg4cm1wOURCaUIvCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0=" //nolint:lll diff --git a/e2e/vmss.go b/e2e/vmss.go index 79bc05af748..5446bd67c9b 100644 --- a/e2e/vmss.go +++ b/e2e/vmss.go @@ -74,13 +74,63 @@ func compileAKSNodeController(ctx context.Context, arch string) (*os.File, error return f, nil } +// maxOutboundCSERetries bounds how many times node provisioning is retried when the +// CSE outbound connectivity preflight check fails (ERR_OUTBOUND_CONN_FAIL / exit 50). +// This is a known transient e2e-infrastructure flake; a genuine product regression +// fails on every attempt and still surfaces once the budget is exhausted. +const maxOutboundCSERetries = 2 + func ConfigureAndCreateVMSS(ctx context.Context, s *Scenario) (*ScenarioVM, error) { - vm, err := CreateVMSSWithRetry(ctx, s) + var vm *ScenarioVM + var err error + for attempt := 0; ; attempt++ { + vm, err = CreateVMSSWithRetry(ctx, s) + if err == nil { + break + } + // Known transient e2e-infra flake: the CSE outbound connectivity preflight check + // (curl mcr.microsoft.com, optionally via the e2e proxy) intermittently fails all + // retries and exits ERR_OUTBOUND_CONN_FAIL (50) before kubelet starts. Recreate the + // node a bounded number of times to reduce PR-gate noise without masking real + // regressions, which fail consistently and survive the retry budget. + if attempt >= maxOutboundCSERetries || s.IsWindows() || config.Config.KeepVMSS || !isTransientOutboundCSEFailure(err) { + break + } + toolkit.Logf(ctx, "CSE failed with ERR_OUTBOUND_CONN_FAIL (exit %s) on VMSS %q: known transient e2e outbound flake, recreating node (attempt %d/%d)", cseExitCodeOutboundConnFail, s.Runtime.VMSSName, attempt+1, maxOutboundCSERetries) + deleteVMSSAndWait(ctx, s) + } + skipTestIfSKUNotAvailableErr(s.T, err) return vm, err } +// isTransientOutboundCSEFailure reports whether a failed provisioning attempt was caused by +// the Linux CSE exiting with ERR_OUTBOUND_CONN_FAIL. Azure embeds the full CSE status +// (including "ExitCode": "50") in the VMExtensionProvisioningError returned by the VMSS +// create operation, so we match it directly rather than re-querying the instance view. +func isTransientOutboundCSEFailure(err error) bool { + return err != nil && strings.Contains(err.Error(), `"ExitCode": "`+cseExitCodeOutboundConnFail+`"`) +} + +// deleteVMSSAndWait synchronously deletes the scenario's VMSS so the same name can be +// safely reused on the next provisioning attempt. Unlike deleteVMSS (fire-and-forget at +// test cleanup), this waits for the delete to complete to avoid a create/delete conflict. +func deleteVMSSAndWait(ctx context.Context, s *Scenario) { + ctx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 5*time.Minute) + defer cancel() + poller, err := config.Azure.VMSS.BeginDelete(ctx, *s.Runtime.Cluster.Model.Properties.NodeResourceGroup, s.Runtime.VMSSName, &armcompute.VirtualMachineScaleSetsClientBeginDeleteOptions{ + ForceDeletion: to.Ptr(true), + }) + if err != nil { + s.T.Logf("failed to begin delete of vmss %q for retry: %s", s.Runtime.VMSSName, err) + return + } + if _, err := poller.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions); err != nil { + s.T.Logf("failed to wait for delete of vmss %q for retry: %s", s.Runtime.VMSSName, err) + } +} + // CustomDataWithHack is similar to nodeconfigutils.CustomData, but it uses a hack to run new aks-node-controller binary. // Original aks-node-controller isn't run because it fails systemd check validating aks-node-controller-config.json exists // (check aks-node-controller.service for details). diff --git a/e2e/vmss_test.go b/e2e/vmss_test.go new file mode 100644 index 00000000000..dfc557292a9 --- /dev/null +++ b/e2e/vmss_test.go @@ -0,0 +1,53 @@ +package e2e + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/require" +) + +// TestCSEExitCodeOutboundConnFail pins the exit code constant to the value emitted by +// ERR_OUTBOUND_CONN_FAIL in parts/linux/cloud-init/artifacts/cse_helpers.sh. If the +// product error code changes, this test forces the harness mitigation to be updated. +func TestCSEExitCodeOutboundConnFail(t *testing.T) { + require.Equal(t, "50", cseExitCodeOutboundConnFail) +} + +// TestIsTransientOutboundCSEFailure verifies the bounded-retry classifier only matches a +// VMExtensionProvisioningError whose embedded CSE status reports ERR_OUTBOUND_CONN_FAIL, +// and ignores other failures so genuine regressions still surface. +func TestIsTransientOutboundCSEFailure(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + { + name: "nil error is not a transient outbound failure", + err: nil, + want: false, + }, + { + // Real-world payload shape from Test_Ubuntu2204_HTTPSProxy_PrivateDNS. + name: "outbound exit 50 embedded in extension provisioning error", + err: errors.New(`VMExtensionProvisioningError: [stdout] { "ExitCode": "50", "Output": "+ exit 50" } [stderr]`), + want: true, + }, + { + name: "different CSE exit code is not retried", + err: errors.New(`VMExtensionProvisioningError: [stdout] { "ExitCode": "51" } [stderr]`), + want: false, + }, + { + name: "unrelated azure error is not retried", + err: errors.New("AllocationFailed: not enough capacity"), + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.want, isTransientOutboundCSEFailure(tt.err)) + }) + } +} From abcea6b8c380d8d7bbe728b019d2310472766649 Mon Sep 17 00:00:00 2001 From: Sri Harsha Date: Wed, 1 Jul 2026 20:40:23 +0000 Subject: [PATCH 2/2] e2e: detect CSE exit50 outbound flake via instance view and retry --- e2e/vmss.go | 53 ++++++++++++++++++++++++++----- e2e/vmss_test.go | 83 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 106 insertions(+), 30 deletions(-) diff --git a/e2e/vmss.go b/e2e/vmss.go index 5446bd67c9b..bd839831c68 100644 --- a/e2e/vmss.go +++ b/e2e/vmss.go @@ -93,10 +93,18 @@ func ConfigureAndCreateVMSS(ctx context.Context, s *Scenario) (*ScenarioVM, erro // retries and exits ERR_OUTBOUND_CONN_FAIL (50) before kubelet starts. Recreate the // node a bounded number of times to reduce PR-gate noise without masking real // regressions, which fail consistently and survive the retry budget. - if attempt >= maxOutboundCSERetries || s.IsWindows() || config.Config.KeepVMSS || !isTransientOutboundCSEFailure(err) { + if attempt >= maxOutboundCSERetries || s.IsWindows() || config.Config.KeepVMSS { break } - toolkit.Logf(ctx, "CSE failed with ERR_OUTBOUND_CONN_FAIL (exit %s) on VMSS %q: known transient e2e outbound flake, recreating node (attempt %d/%d)", cseExitCodeOutboundConnFail, s.Runtime.VMSSName, attempt+1, maxOutboundCSERetries) + // The VMExtensionProvisioningError returned by the create operation does not reliably + // embed the CSE status JSON, so classify the failure from the extension instance view + // (the same source getCustomScriptExtensionStatus parses) rather than string-matching + // the ARM error. Only the outbound preflight exit code is treated as retryable. + exitCode, ok := getLinuxCSEExitCode(ctx, s) + if !ok || exitCode != cseExitCodeOutboundConnFail { + break + } + toolkit.Logf(ctx, "CSE failed with ERR_OUTBOUND_CONN_FAIL (exit %s) on VMSS %q: known transient e2e outbound flake, recreating node (attempt %d/%d)", exitCode, s.Runtime.VMSSName, attempt+1, maxOutboundCSERetries) deleteVMSSAndWait(ctx, s) } @@ -105,12 +113,41 @@ func ConfigureAndCreateVMSS(ctx context.Context, s *Scenario) (*ScenarioVM, erro return vm, err } -// isTransientOutboundCSEFailure reports whether a failed provisioning attempt was caused by -// the Linux CSE exiting with ERR_OUTBOUND_CONN_FAIL. Azure embeds the full CSE status -// (including "ExitCode": "50") in the VMExtensionProvisioningError returned by the VMSS -// create operation, so we match it directly rather than re-querying the instance view. -func isTransientOutboundCSEFailure(err error) bool { - return err != nil && strings.Contains(err.Error(), `"ExitCode": "`+cseExitCodeOutboundConnFail+`"`) +// getLinuxCSEExitCode queries the VMSS instance view and returns the Linux CSE exit code +// parsed from the CustomScript extension status. It reports ok=false when no parseable CSE +// exit code is available (e.g. Windows, a non-CSE failure, or the instance view is not yet +// populated). This is the reliable source of the exit code because the ARM provisioning +// error does not consistently carry the full CSE status payload. +func getLinuxCSEExitCode(ctx context.Context, s *Scenario) (string, bool) { + ctx, cancel := context.WithTimeout(context.WithoutCancel(ctx), time.Minute) + defer cancel() + pager := config.Azure.VMSSVM.NewListPager(*s.Runtime.Cluster.Model.Properties.NodeResourceGroup, s.Runtime.VMSSName, &armcompute.VirtualMachineScaleSetVMsClientListOptions{ + Expand: to.Ptr("instanceView"), + }) + for pager.More() { + page, err := pager.NextPage(ctx) + if err != nil { + return "", false + } + for _, vmssVM := range page.Value { + if vmssVM.Properties == nil || vmssVM.Properties.InstanceView == nil { + continue + } + for _, extension := range vmssVM.Properties.InstanceView.Extensions { + for _, status := range extension.Statuses { + if status == nil { + continue + } + cseStatus, err := parseLinuxCSEMessage(*status) + if err != nil || cseStatus == nil || cseStatus.ExitCode == "" { + continue + } + return cseStatus.ExitCode, true + } + } + } + } + return "", false } // deleteVMSSAndWait synchronously deletes the scenario's VMSS so the same name can be diff --git a/e2e/vmss_test.go b/e2e/vmss_test.go index dfc557292a9..efc7aa7a11d 100644 --- a/e2e/vmss_test.go +++ b/e2e/vmss_test.go @@ -1,9 +1,10 @@ package e2e import ( - "errors" "testing" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v7" "github.com/stretchr/testify/require" ) @@ -14,40 +15,78 @@ func TestCSEExitCodeOutboundConnFail(t *testing.T) { require.Equal(t, "50", cseExitCodeOutboundConnFail) } -// TestIsTransientOutboundCSEFailure verifies the bounded-retry classifier only matches a -// VMExtensionProvisioningError whose embedded CSE status reports ERR_OUTBOUND_CONN_FAIL, -// and ignores other failures so genuine regressions still surface. -func TestIsTransientOutboundCSEFailure(t *testing.T) { +// TestParseLinuxCSEMessageOutboundExitCode verifies that parseLinuxCSEMessage extracts the +// outbound-connectivity exit code from a real CustomScript extension instance-view status. +// getLinuxCSEExitCode relies on this parsing to classify the retryable e2e flake, so a +// change to the message format must be reflected here. +func TestParseLinuxCSEMessageOutboundExitCode(t *testing.T) { tests := []struct { - name string - err error - want bool + name string + code string + message string + wantExitCode string + wantErr bool }{ { - name: "nil error is not a transient outbound failure", - err: nil, - want: false, + name: "well-formed CSE json with outbound exit code", + code: "ProvisioningState/failed/0", + message: `Enable failed: [stdout] { "ExitCode": "50", "Output": "+ exit 50" } [stderr]`, + wantExitCode: "50", }, { - // Real-world payload shape from Test_Ubuntu2204_HTTPSProxy_PrivateDNS. - name: "outbound exit 50 embedded in extension provisioning error", - err: errors.New(`VMExtensionProvisioningError: [stdout] { "ExitCode": "50", "Output": "+ exit 50" } [stderr]`), - want: true, + name: "unparsable body falls back to extension exit status", + code: "ProvisioningState/failed/0", + message: `Enable failed: failed to execute command: command terminated with exit status=50 [stdout]not-json[stderr]`, + wantExitCode: "50", }, { - name: "different CSE exit code is not retried", - err: errors.New(`VMExtensionProvisioningError: [stdout] { "ExitCode": "51" } [stderr]`), - want: false, + name: "well-formed CSE json with non-outbound exit code", + code: "ProvisioningState/failed/0", + message: `Enable failed: [stdout] { "ExitCode": "51", "Output": "+ exit 51" } [stderr]`, + wantExitCode: "51", }, { - name: "unrelated azure error is not retried", - err: errors.New("AllocationFailed: not enough capacity"), - want: false, + // Real Test_Ubuntu2204_HTTPSProxy_PrivateDNS/default failure: the outer extension + // wrapper and the CSE status both report 50. + name: "real outbound flake, outer exit 50 and cse exit 50", + code: "ProvisioningState/failed/0", + message: "failed to execute command: command terminated with exit status=50\n[stdout]\n" + + `{ "ExitCode": "50", "Output": "Processing manual pages under /usr/local/man...\n++ date\n+ echo 'man-db finished updates'\n+ exit 50", "Error": "", "ExecDuration": "155", "BootDatapoints": { "KubeletStartTime": "n/a" } }` + + "\n\n[stderr]\ndate: invalid date 'n/a'\n", + wantExitCode: "50", + }, + { + // Real Test_Ubuntu2204_HTTPSProxy_PrivateDNS/scriptless_nbc failure: the outer + // extension wrapper reports exit status=1, but the CSE status reports 50. The + // classifier must read the CSE ExitCode field, not the outer wrapper. + name: "real outbound flake, outer exit 1 but cse exit 50", + code: "ProvisioningState/failed/0", + message: "failed to execute command: command terminated with exit status=1\n[stdout]\n" + + `{ "ExitCode": "50", "Output": "man-db finished updates\n+ exit 50", "Error": "", "ExecDuration": "70", "BootDatapoints": { "KubeletStartTime": "n/a" } }` + + "\n\n[stderr]\ndate\n", + wantExitCode: "50", + }, + { + name: "no parsable body", + code: "ProvisioningState/failed/0", + message: `Enable failed with no parsable body`, + wantErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - require.Equal(t, tt.want, isTransientOutboundCSEFailure(tt.err)) + status := armcompute.InstanceViewStatus{ + Code: to.Ptr(tt.code), + Message: to.Ptr(tt.message), + } + cseStatus, err := parseLinuxCSEMessage(status) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + require.NotNil(t, cseStatus) + require.Equal(t, tt.wantExitCode, cseStatus.ExitCode) }) } }