@@ -92,6 +92,12 @@ func (c *sshConnectivity) init() error {
9292 }
9393 var err error
9494 var sshClient * ssh.Client
95+ // startTime is time when we started attempting connection during user-data execution
96+ startTime := time .Now ()
97+ // authRetryTimeout is the duration to retry authentication errors, allowing time for
98+ // user-data scripts to complete SSH configuration on newly provisioned VMs
99+ authRetryTimeout := 5 * time .Minute
100+
95101 // Retry if we are unable to create a client as the VM could still be executing the steps in its user data
96102 err = wait .PollImmediate (time .Minute , retry .Timeout , func () (bool , error ) {
97103 sshClient , err = ssh .Dial ("tcp" , c .ipAddress + ":" + sshPort , config )
@@ -100,7 +106,13 @@ func (c *sshConnectivity) init() error {
100106 }
101107 c .log .V (1 ).Info ("SSH dial" , "IP Address" , c .ipAddress , "error" , err )
102108 if strings .Contains (err .Error (), "unable to authenticate" ) {
103- // Authentication failure is a special case that must be handled differently
109+ // Retry authentication failure as the VM's user-data script may still be configuring SSH.
110+ elapsed := time .Since (startTime )
111+ if elapsed < authRetryTimeout {
112+ c .log .V (1 ).Info ("authentication failed, retrying as VM may still be executing user-data script" ,
113+ "elapsed" , elapsed , "timeout" , authRetryTimeout )
114+ return false , nil
115+ }
104116 return false , newAuthErr (err )
105117 }
106118 return false , nil
0 commit comments