Skip to content

Commit 8ed1230

Browse files
committed
Return generated instance names from autoscaling GCE client
When there are multiple `createInstances` requests sent in a sequence, the next request must be aware of the instance names used by the previous request. Without this change sending a sequence of create instances requests would result in the instance name duplication error.
1 parent a62fc19 commit 8ed1230

File tree

5 files changed

+97
-12
lines changed

5 files changed

+97
-12
lines changed

cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ type AutoscalingGceClient interface {
142142
// modifying resources
143143
ResizeMig(GceRef, int64) error
144144
DeleteInstances(migRef GceRef, instances []GceRef) error
145-
CreateInstances(GceRef, string, int64, []string) error
145+
CreateInstances(GceRef, string, int64, []string) ([]string, error)
146146

147147
// WaitForOperation can be used to poll GCE operations until completion/timeout using WAIT calls.
148148
// Calling this is normally not needed when interacting with the client, other methods should call it internally.
@@ -294,24 +294,27 @@ func (client *autoscalingGceClientV1) ResizeMig(migRef GceRef, size int64) error
294294
return client.WaitForOperation(op.Name, op.OperationType, migRef.Project, migRef.Zone)
295295
}
296296

297-
func (client *autoscalingGceClientV1) CreateInstances(migRef GceRef, baseName string, delta int64, existingInstanceProviderIds []string) error {
297+
func (client *autoscalingGceClientV1) CreateInstances(migRef GceRef, baseName string, delta int64, existingInstanceProviderIds []string) ([]string, error) {
298298
registerRequest("instance_group_managers", "create_instances")
299299
ctx, cancel := context.WithTimeout(context.Background(), client.operationPerCallTimeout)
300300
defer cancel()
301301
req := gce.InstanceGroupManagersCreateInstancesRequest{}
302302
instanceNames := instanceIdsToNamesMap(existingInstanceProviderIds)
303303
req.Instances = make([]*gce.PerInstanceConfig, 0, delta)
304-
for i := int64(0); i < delta; i++ {
304+
createdIds := make([]string, delta)
305+
for i := range delta {
305306
newInstanceName := generateInstanceName(baseName, instanceNames)
306307
instanceNames[newInstanceName] = true
307308
req.Instances = append(req.Instances, &gce.PerInstanceConfig{Name: newInstanceName})
309+
ref := GceRef{migRef.Project, migRef.Zone, newInstanceName}
310+
createdIds[i] = ref.ToProviderId()
308311
}
309312

310313
op, err := client.gceService.InstanceGroupManagers.CreateInstances(migRef.Project, migRef.Zone, migRef.Name, &req).Context(ctx).Do()
311314
if err != nil {
312-
return err
315+
return nil, err
313316
}
314-
return client.WaitForOperation(op.Name, op.OperationType, migRef.Project, migRef.Zone)
317+
return createdIds, client.WaitForOperation(op.Name, op.OperationType, migRef.Project, migRef.Zone)
315318
}
316319

317320
func instanceIdsToNamesMap(instanceProviderIds []string) map[string]bool {

cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"net/http"
2424
"os"
2525
"regexp"
26+
"strings"
2627
"testing"
2728
"time"
2829

@@ -34,6 +35,7 @@ import (
3435
"github.com/google/go-cmp/cmp/cmpopts"
3536
"github.com/stretchr/testify/assert"
3637
"github.com/stretchr/testify/mock"
38+
"github.com/stretchr/testify/require"
3739
gce_api "google.golang.org/api/compute/v1"
3840
)
3941

@@ -697,7 +699,8 @@ func TestAutoscalingClientTimeouts(t *testing.T) {
697699
}{
698700
"CreateInstances_ContextTimeout": {
699701
clientFunc: func(client *autoscalingGceClientV1) error {
700-
return client.CreateInstances(GceRef{}, "", 0, nil)
702+
_, err := client.CreateInstances(GceRef{}, "", 0, nil)
703+
return err
701704
},
702705
operationPerCallTimeout: &instantTimeout,
703706
},
@@ -764,7 +767,8 @@ func TestAutoscalingClientTimeouts(t *testing.T) {
764767
},
765768
"CreateInstances_HttpClientTimeout": {
766769
clientFunc: func(client *autoscalingGceClientV1) error {
767-
return client.CreateInstances(GceRef{}, "", 0, nil)
770+
_, err := client.CreateInstances(GceRef{}, "", 0, nil)
771+
return err
768772
},
769773
httpTimeout: instantTimeout,
770774
},
@@ -896,6 +900,27 @@ func TestAutoscalingClientTimeouts(t *testing.T) {
896900
}
897901
}
898902

903+
func TestCreateInstances(t *testing.T) {
904+
server := test_util.NewHttpServerMock()
905+
defer server.Close()
906+
b, err := json.Marshal(gce_api.Operation{
907+
Name: "operation-2505728466148-216f5197",
908+
})
909+
assert.NoError(t, err)
910+
server.On("handle", "/projects/project1/zones/us-central1-b/instanceGroupManagers/igm1/createInstances").Return(string(b)).Times(1)
911+
server.On("handle", "/projects/project1/zones/us-central1-b/operations/operation-2505728466148-216f5197/wait").Return(operationDoneResponse).Once()
912+
client := newTestAutoscalingGceClientWithTimeout(t, "project", server.URL, "", time.Second)
913+
migRef := GceRef{Project: "project1", Zone: "us-central1-b", Name: "igm1"}
914+
createdIds, err := client.CreateInstances(migRef, migRef.Name, 10, nil)
915+
assert.NoError(t, err)
916+
assert.Len(t, createdIds, 10, "Expected 10 instance names in result")
917+
for _, id := range createdIds {
918+
createdRef, _ := GceRefFromProviderId(id)
919+
prefixed := strings.HasPrefix(createdRef.Name, migRef.Name+"-")
920+
require.Truef(t, prefixed, "Expected node name \"%v\" to be prefixed with \"%v\"", createdRef.Name, migRef.Name)
921+
}
922+
}
923+
899924
func TestFetchAllInstances(t *testing.T) {
900925
igm1 := "projects/893226960234/zones/zones/instanceGroupManagers/test-igm1-grp"
901926
igm2 := "projects/893226960234/zones/zones/instanceGroupManagers/test-igm2-grp"

cluster-autoscaler/cloudprovider/gce/gce_manager.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ const (
5656
migAutoDiscovererKeyPrefix = "namePrefix"
5757
migAutoDiscovererKeyMinNodes = "min"
5858
migAutoDiscovererKeyMaxNodes = "max"
59+
createInstancesRequestLimit = 1000
5960
)
6061

6162
var (
@@ -332,16 +333,32 @@ func (m *gceManagerImpl) CreateInstances(mig Mig, delta int64) error {
332333
if err != nil {
333334
return err
334335
}
335-
instancesNames := make([]string, 0, len(instances))
336+
instanceIds := make([]string, 0, len(instances)+int(delta))
336337
for _, ins := range instances {
337-
instancesNames = append(instancesNames, ins.Id)
338+
instanceIds = append(instanceIds, ins.Id)
338339
}
339340
baseName, err := m.migInfoProvider.GetMigBasename(mig.GceRef())
340341
if err != nil {
341342
return fmt.Errorf("can't upscale %s: failed to collect BaseInstanceName: %w", mig.GceRef(), err)
342343
}
343344
m.cache.InvalidateMigTargetSize(mig.GceRef())
344-
return m.GceService.CreateInstances(mig.GceRef(), baseName, delta, instancesNames)
345+
totalReqs := int((delta + createInstancesRequestLimit - 1) / createInstancesRequestLimit)
346+
remaining := delta
347+
for i := 0; i < totalReqs; i++ {
348+
increment := min(remaining, createInstancesRequestLimit)
349+
if totalReqs > 1 {
350+
klog.Infof("Sending chunked GCE createInstances request. Request: %d/%d RequestSize: %v", i+1, totalReqs, increment)
351+
}
352+
ids, err := m.GceService.CreateInstances(mig.GceRef(), baseName, increment, instanceIds)
353+
if err != nil {
354+
return err
355+
}
356+
remaining -= increment
357+
for _, id := range ids {
358+
instanceIds = append(instanceIds, id)
359+
}
360+
}
361+
return nil
345362
}
346363

347364
func (m *gceManagerImpl) forceRefresh() error {

cluster-autoscaler/cloudprovider/gce/gce_manager_test.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,6 +1520,46 @@ func TestAppendInstances(t *testing.T) {
15201520
mock.AssertExpectationsForObjects(t, server)
15211521
}
15221522

1523+
func TestCreateInstancesWithMultipleRequests(t *testing.T) {
1524+
server := NewHttpServerMock()
1525+
defer server.Close()
1526+
g := newTestGceManager(t, server.URL, false)
1527+
mig := setupTestDefaultPool(g, true)
1528+
server.On("handle", "/projects/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool/listManagedInstances").Return(buildListInstanceGroupManagersResponse(
1529+
buildListInstanceGroupManagersResponsePart(defaultPoolMigName, zoneB, 3),
1530+
)).Once()
1531+
1532+
tests := []struct {
1533+
delta int
1534+
wantRequests int
1535+
}{
1536+
{
1537+
delta: 100,
1538+
wantRequests: 1,
1539+
},
1540+
{
1541+
delta: 1000,
1542+
wantRequests: 1,
1543+
},
1544+
{
1545+
delta: 1001,
1546+
wantRequests: 2,
1547+
},
1548+
{
1549+
delta: 3000,
1550+
wantRequests: 3,
1551+
},
1552+
}
1553+
for _, tt := range tests {
1554+
t.Run(fmt.Sprintf("delta=%v", tt.delta), func(t *testing.T) {
1555+
server.On("handle", fmt.Sprintf("/projects/project1/zones/us-central1-b/instanceGroupManagers/%v/createInstances", mig.gceRef.Name)).Return(createInstancesResponse).Times(tt.wantRequests)
1556+
server.On("handle", "/projects/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32/wait").Return(createInstancesOperationResponse).Times(tt.wantRequests)
1557+
err := g.CreateInstances(mig, int64(tt.delta))
1558+
assert.NoError(t, err)
1559+
})
1560+
}
1561+
}
1562+
15231563
func TestGetMigOptions(t *testing.T) {
15241564
defaultOptions := &config.NodeGroupAutoscalingOptions{
15251565
ScaleDownUtilizationThreshold: 0.1,

cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ func (client *mockAutoscalingGceClient) DeleteInstances(_ GceRef, _ []GceRef) er
195195
return nil
196196
}
197197

198-
func (client *mockAutoscalingGceClient) CreateInstances(_ GceRef, _ string, _ int64, _ []string) error {
199-
return nil
198+
func (client *mockAutoscalingGceClient) CreateInstances(_ GceRef, _ string, _ int64, _ []string) ([]string, error) {
199+
return nil, nil
200200
}
201201

202202
func (client *mockAutoscalingGceClient) WaitForOperation(_, _, _, _ string) error {

0 commit comments

Comments
 (0)