From 2b4fa1217d7ededc7ee13a49053ea83b074c3dba Mon Sep 17 00:00:00 2001 From: pawel siwek Date: Thu, 7 Mar 2024 11:52:09 +0000 Subject: [PATCH] Add listManagedInstancesResults to GceCache. --- .../gce/autoscaling_gce_client.go | 15 ++ cluster-autoscaler/cloudprovider/gce/cache.go | 74 ++++++--- .../cloudprovider/gce/cache_test.go | 26 ++++ .../cloudprovider/gce/gce_manager.go | 1 + .../cloudprovider/gce/gce_manager_test.go | 11 +- .../cloudprovider/gce/mig_info_provider.go | 28 ++++ .../gce/mig_info_provider_test.go | 146 +++++++++++++++--- 7 files changed, 247 insertions(+), 54 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go index 9120596d6c13..12d88fb183c5 100644 --- a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go +++ b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go @@ -110,6 +110,7 @@ type AutoscalingGceClient interface { FetchAvailableCpuPlatforms() (map[string][]string, error) FetchReservations() ([]*gce.Reservation, error) FetchReservationsInProject(projectId string) ([]*gce.Reservation, error) + FetchListManagedInstancesResults(migRef GceRef) (string, error) // modifying resources ResizeMig(GceRef, int64) error @@ -234,6 +235,20 @@ func (client *autoscalingGceClientV1) FetchMigBasename(migRef GceRef) (string, e return igm.BaseInstanceName, nil } +func (client *autoscalingGceClientV1) FetchListManagedInstancesResults(migRef GceRef) (string, error) { + registerRequest("instance_group_managers", "get") + igm, err := client.gceService.InstanceGroupManagers.Get(migRef.Project, migRef.Zone, migRef.Name).Fields("listManagedInstancesResults").Do() + if err != nil { + if err, ok := err.(*googleapi.Error); ok { + if err.Code == http.StatusNotFound { + return "", errors.NewAutoscalerError(errors.NodeGroupDoesNotExistError, "%s", err.Error()) + } + } + return "", err + } + return igm.ListManagedInstancesResults, nil +} + func (client *autoscalingGceClientV1) ResizeMig(migRef GceRef, size int64) error { registerRequest("instance_group_managers", "resize") op, err := client.gceService.InstanceGroupManagers.Resize(migRef.Project, migRef.Zone, migRef.Name, size).Do() diff --git a/cluster-autoscaler/cloudprovider/gce/cache.go b/cluster-autoscaler/cloudprovider/gce/cache.go index 28e37848b0ee..1d46e4f353d5 100644 --- a/cluster-autoscaler/cloudprovider/gce/cache.go +++ b/cluster-autoscaler/cloudprovider/gce/cache.go @@ -56,36 +56,38 @@ type GceCache struct { cacheMutex sync.Mutex // Cache content. - migs map[GceRef]Mig - instances map[GceRef][]GceInstance - instancesUpdateTime map[GceRef]time.Time - instancesToMig map[GceRef]GceRef - instancesFromUnknownMig map[GceRef]bool - resourceLimiter *cloudprovider.ResourceLimiter - autoscalingOptionsCache map[GceRef]map[string]string - machinesCache map[MachineTypeKey]MachineType - migTargetSizeCache map[GceRef]int64 - migBaseNameCache map[GceRef]string - instanceTemplateNameCache map[GceRef]string - instanceTemplatesCache map[GceRef]*gce.InstanceTemplate - kubeEnvCache map[GceRef]KubeEnv + migs map[GceRef]Mig + instances map[GceRef][]GceInstance + instancesUpdateTime map[GceRef]time.Time + instancesToMig map[GceRef]GceRef + instancesFromUnknownMig map[GceRef]bool + resourceLimiter *cloudprovider.ResourceLimiter + autoscalingOptionsCache map[GceRef]map[string]string + machinesCache map[MachineTypeKey]MachineType + migTargetSizeCache map[GceRef]int64 + migBaseNameCache map[GceRef]string + listManagedInstancesResultsCache map[GceRef]string + instanceTemplateNameCache map[GceRef]string + instanceTemplatesCache map[GceRef]*gce.InstanceTemplate + kubeEnvCache map[GceRef]KubeEnv } // NewGceCache creates empty GceCache. func NewGceCache() *GceCache { return &GceCache{ - migs: map[GceRef]Mig{}, - instances: map[GceRef][]GceInstance{}, - instancesUpdateTime: map[GceRef]time.Time{}, - instancesToMig: map[GceRef]GceRef{}, - instancesFromUnknownMig: map[GceRef]bool{}, - autoscalingOptionsCache: map[GceRef]map[string]string{}, - machinesCache: map[MachineTypeKey]MachineType{}, - migTargetSizeCache: map[GceRef]int64{}, - migBaseNameCache: map[GceRef]string{}, - instanceTemplateNameCache: map[GceRef]string{}, - instanceTemplatesCache: map[GceRef]*gce.InstanceTemplate{}, - kubeEnvCache: map[GceRef]KubeEnv{}, + migs: map[GceRef]Mig{}, + instances: map[GceRef][]GceInstance{}, + instancesUpdateTime: map[GceRef]time.Time{}, + instancesToMig: map[GceRef]GceRef{}, + instancesFromUnknownMig: map[GceRef]bool{}, + autoscalingOptionsCache: map[GceRef]map[string]string{}, + machinesCache: map[MachineTypeKey]MachineType{}, + migTargetSizeCache: map[GceRef]int64{}, + migBaseNameCache: map[GceRef]string{}, + listManagedInstancesResultsCache: map[GceRef]string{}, + instanceTemplateNameCache: map[GceRef]string{}, + instanceTemplatesCache: map[GceRef]*gce.InstanceTemplate{}, + kubeEnvCache: map[GceRef]KubeEnv{}, } } @@ -515,3 +517,25 @@ func (gc *GceCache) InvalidateAllMigBasenames() { defer gc.cacheMutex.Unlock() gc.migBaseNameCache = make(map[GceRef]string) } + +// SetListManagedInstancesResults sets listManagedInstancesResults for a given mig in cache +func (gc *GceCache) SetListManagedInstancesResults(migRef GceRef, listManagedInstancesResults string) { + gc.cacheMutex.Lock() + defer gc.cacheMutex.Unlock() + gc.listManagedInstancesResultsCache[migRef] = listManagedInstancesResults +} + +// GetListManagedInstancesResults gets listManagedInstancesResults for a given mig from cache. +func (gc *GceCache) GetListManagedInstancesResults(migRef GceRef) (string, bool) { + gc.cacheMutex.Lock() + defer gc.cacheMutex.Unlock() + listManagedInstancesResults, found := gc.listManagedInstancesResultsCache[migRef] + return listManagedInstancesResults, found +} + +// InvalidateAllListManagedInstancesResults invalidates all listManagedInstancesResults entries. +func (gc *GceCache) InvalidateAllListManagedInstancesResults() { + gc.cacheMutex.Lock() + defer gc.cacheMutex.Unlock() + gc.listManagedInstancesResultsCache = make(map[GceRef]string) +} diff --git a/cluster-autoscaler/cloudprovider/gce/cache_test.go b/cluster-autoscaler/cloudprovider/gce/cache_test.go index 4996664243bc..abefc1d86386 100644 --- a/cluster-autoscaler/cloudprovider/gce/cache_test.go +++ b/cluster-autoscaler/cloudprovider/gce/cache_test.go @@ -87,3 +87,29 @@ func TestMachineCache(t *testing.T) { }) } } + +func TestListManagedInstancesResultsCache(t *testing.T) { + checkInCache := func(c *GceCache, migRef GceRef, expectedResults string) { + result, found := c.GetListManagedInstancesResults(migRef) + if !found { + t.Errorf("Results not found for MIG ref: %s", migRef.String()) + } + if result != expectedResults { + t.Errorf("Expected results %s for MIG ref: %s, but got: %s", expectedResults, migRef.String(), result) + } + } + migRef := GceRef{ + Project: "project", + Zone: "us-test1", + Name: "mig", + } + c := NewGceCache() + c.SetListManagedInstancesResults(migRef, "PAGINATED") + checkInCache(c, migRef, "PAGINATED") + c.SetListManagedInstancesResults(migRef, "PAGELESS") + checkInCache(c, migRef, "PAGELESS") + c.InvalidateAllListManagedInstancesResults() + if cacheSize := len(c.listManagedInstancesResultsCache); cacheSize > 0 { + t.Errorf("Expected listManagedInstancesResultsCache to be empty, but it still contains %d entries", cacheSize) + } +} diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager.go b/cluster-autoscaler/cloudprovider/gce/gce_manager.go index df92294e57fd..3ba1778269b2 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_manager.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_manager.go @@ -300,6 +300,7 @@ func (m *gceManagerImpl) Refresh() error { m.cache.InvalidateAllMigInstances() m.cache.InvalidateAllMigTargetSizes() m.cache.InvalidateAllMigBasenames() + m.cache.InvalidateAllListManagedInstancesResults() m.cache.InvalidateAllMigInstanceTemplateNames() if m.lastRefresh.Add(refreshInterval).After(time.Now()) { return nil diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go b/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go index 287d5bba8721..b4ddba0b4b87 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go @@ -343,11 +343,12 @@ func newTestGceManager(t *testing.T, testServerURL string, regional bool) *gceMa {"us-central1-c", "n1-standard-1"}: {Name: "n1-standard-1", CPU: 1, Memory: 1}, {"us-central1-f", "n1-standard-1"}: {Name: "n1-standard-1", CPU: 1, Memory: 1}, }, - migTargetSizeCache: map[GceRef]int64{}, - instanceTemplateNameCache: map[GceRef]string{}, - instanceTemplatesCache: map[GceRef]*gce.InstanceTemplate{}, - kubeEnvCache: map[GceRef]KubeEnv{}, - migBaseNameCache: map[GceRef]string{}, + migTargetSizeCache: map[GceRef]int64{}, + instanceTemplateNameCache: map[GceRef]string{}, + instanceTemplatesCache: map[GceRef]*gce.InstanceTemplate{}, + kubeEnvCache: map[GceRef]KubeEnv{}, + migBaseNameCache: map[GceRef]string{}, + listManagedInstancesResultsCache: map[GceRef]string{}, } migLister := NewMigLister(cache) manager := &gceManagerImpl{ diff --git a/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go b/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go index ffdd4e18a6b6..bd7c61c2ef8e 100644 --- a/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go +++ b/cluster-autoscaler/cloudprovider/gce/mig_info_provider.go @@ -52,6 +52,8 @@ type MigInfoProvider interface { // For custom machines cpu and memory information is based on parsing // machine name. For standard types it's retrieved from GCE API. GetMigMachineType(migRef GceRef) (MachineType, error) + // Returns the pagination behavior of the listManagedInstances API method for a given MIG ref + GetListManagedInstancesResults(migRef GceRef) (string, error) } type timeProvider interface { @@ -356,6 +358,7 @@ func (c *cachingMigInfoProvider) fillMigInfoCache() error { if registeredMigRefs[zoneMigRef] { c.cache.SetMigTargetSize(zoneMigRef, zoneMig.TargetSize) c.cache.SetMigBasename(zoneMigRef, zoneMig.BaseInstanceName) + c.cache.SetListManagedInstancesResults(zoneMigRef, zoneMig.ListManagedInstancesResults) templateUrl, err := url.Parse(zoneMig.InstanceTemplate) if err == nil { @@ -411,3 +414,28 @@ func (c *cachingMigInfoProvider) GetMigMachineType(migRef GceRef) (MachineType, } return machine, nil } + +func (c *cachingMigInfoProvider) GetListManagedInstancesResults(migRef GceRef) (string, error) { + c.migInfoMutex.Lock() + defer c.migInfoMutex.Unlock() + + listManagedInstancesResults, found := c.cache.GetListManagedInstancesResults(migRef) + if found { + return listManagedInstancesResults, nil + } + + err := c.fillMigInfoCache() + listManagedInstancesResults, found = c.cache.GetListManagedInstancesResults(migRef) + if err == nil && found { + return listManagedInstancesResults, nil + } + + // fallback to querying for a single mig + listManagedInstancesResults, err = c.gceClient.FetchListManagedInstancesResults(migRef) + if err != nil { + c.migLister.HandleMigIssue(migRef, err) + return "", err + } + c.cache.SetListManagedInstancesResults(migRef, listManagedInstancesResults) + return listManagedInstancesResults, nil +} diff --git a/cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go b/cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go index b0bd51bdf50a..a7e9e311766f 100644 --- a/cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go +++ b/cluster-autoscaler/cloudprovider/gce/mig_info_provider_test.go @@ -30,13 +30,14 @@ import ( ) var ( - errFetchMig = errors.New("fetch migs error") - errFetchMigInstances = errors.New("fetch mig instances error") - errFetchMigTargetSize = errors.New("fetch mig target size error") - errFetchMigBaseName = errors.New("fetch mig basename error") - errFetchMigTemplateName = errors.New("fetch mig template name error") - errFetchMigTemplate = errors.New("fetch mig template error") - errFetchMachineType = errors.New("fetch machine type error") + errFetchMig = errors.New("fetch migs error") + errFetchMigInstances = errors.New("fetch mig instances error") + errFetchMigTargetSize = errors.New("fetch mig target size error") + errFetchMigBaseName = errors.New("fetch mig basename error") + errFetchMigTemplateName = errors.New("fetch mig template name error") + errFetchMigTemplate = errors.New("fetch mig template error") + errFetchListManagedInstancesResults = errors.New("fetch ListManagedInstancesResults error") + errFetchMachineType = errors.New("fetch machine type error") mig = &gceMig{ gceRef: GceRef{ @@ -48,13 +49,14 @@ var ( ) type mockAutoscalingGceClient struct { - fetchMigs func(string) ([]*gce.InstanceGroupManager, error) - fetchMigTargetSize func(GceRef) (int64, error) - fetchMigBasename func(GceRef) (string, error) - fetchMigInstances func(GceRef) ([]GceInstance, error) - fetchMigTemplateName func(GceRef) (string, error) - fetchMigTemplate func(GceRef, string) (*gce.InstanceTemplate, error) - fetchMachineType func(string, string) (*gce.MachineType, error) + fetchMigs func(string) ([]*gce.InstanceGroupManager, error) + fetchMigTargetSize func(GceRef) (int64, error) + fetchMigBasename func(GceRef) (string, error) + fetchMigInstances func(GceRef) ([]GceInstance, error) + fetchMigTemplateName func(GceRef) (string, error) + fetchMigTemplate func(GceRef, string) (*gce.InstanceTemplate, error) + fetchMachineType func(string, string) (*gce.MachineType, error) + fetchListManagedInstancesResults func(GceRef) (string, error) } func (client *mockAutoscalingGceClient) FetchMachineType(zone, machineName string) (*gce.MachineType, error) { @@ -77,6 +79,10 @@ func (client *mockAutoscalingGceClient) FetchMigBasename(migRef GceRef) (string, return client.fetchMigBasename(migRef) } +func (client *mockAutoscalingGceClient) FetchListManagedInstancesResults(migRef GceRef) (string, error) { + return client.fetchListManagedInstancesResults(migRef) +} + func (client *mockAutoscalingGceClient) FetchMigInstances(migRef GceRef) ([]GceInstance, error) { return client.fetchMigInstances(migRef) } @@ -703,7 +709,7 @@ func TestGetMigBasename(t *testing.T) { expectedBasename: basename, }, { - name: "target size from cache fill", + name: "basename from cache fill", cache: emptyCache(), fetchMigs: fetchMigsConst([]*gce.InstanceGroupManager{instanceGroupManager}), expectedBasename: basename, @@ -760,6 +766,87 @@ func TestGetMigBasename(t *testing.T) { } } +func TestGetListManagedInstancesResults(t *testing.T) { + results := "PAGELESS" + instanceGroupManager := &gce.InstanceGroupManager{ + Zone: mig.GceRef().Zone, + Name: mig.GceRef().Name, + ListManagedInstancesResults: results, + } + testCases := []struct { + name string + cache *GceCache + fetchMigs func(string) ([]*gce.InstanceGroupManager, error) + fetchResults func(GceRef) (string, error) + expectedResults string + expectedErr error + }{ + { + name: "results in cache", + cache: &GceCache{ + migs: map[GceRef]Mig{mig.GceRef(): mig}, + listManagedInstancesResultsCache: map[GceRef]string{mig.GceRef(): results}, + }, + expectedResults: results, + }, + { + name: "results from cache fill", + cache: emptyCache(), + fetchMigs: fetchMigsConst([]*gce.InstanceGroupManager{instanceGroupManager}), + expectedResults: results, + }, + { + name: "cache fill without mig, fallback success", + cache: emptyCache(), + fetchMigs: fetchMigsConst([]*gce.InstanceGroupManager{}), + fetchResults: fetchListManagedInstancesResultsConst(results), + expectedResults: results, + }, + { + name: "cache fill failure, fallback success", + cache: emptyCache(), + fetchMigs: fetchMigsFail, + fetchResults: fetchListManagedInstancesResultsConst(results), + expectedResults: results, + }, + { + name: "cache fill without mig, fallback failure", + cache: emptyCache(), + fetchMigs: fetchMigsConst([]*gce.InstanceGroupManager{}), + fetchResults: fetchListManagedInstancesResultsFail, + expectedErr: errFetchListManagedInstancesResults, + }, + { + name: "cache fill failure, fallback failure", + cache: emptyCache(), + fetchMigs: fetchMigsFail, + fetchResults: fetchListManagedInstancesResultsFail, + expectedErr: errFetchListManagedInstancesResults, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + client := &mockAutoscalingGceClient{ + fetchMigs: tc.fetchMigs, + fetchListManagedInstancesResults: tc.fetchResults, + } + migLister := NewMigLister(tc.cache) + provider := NewCachingMigInfoProvider(tc.cache, migLister, client, mig.GceRef().Project, 1, 0*time.Second) + + results, err := provider.GetListManagedInstancesResults(mig.GceRef()) + cachedResults, found := tc.cache.GetListManagedInstancesResults(mig.GceRef()) + + assert.Equal(t, tc.expectedErr, err) + assert.Equal(t, tc.expectedErr == nil, found) + if tc.expectedErr == nil { + assert.Equal(t, tc.expectedResults, results) + assert.Equal(t, tc.expectedResults, cachedResults) + } + }) + } +} + func TestGetMigInstanceTemplateName(t *testing.T) { templateName := "template-name" instanceGroupManager := &gce.InstanceGroupManager{ @@ -785,7 +872,7 @@ func TestGetMigInstanceTemplateName(t *testing.T) { expectedTemplateName: templateName, }, { - name: "target size from cache fill", + name: "template name from cache fill", cache: emptyCache(), fetchMigs: fetchMigsConst([]*gce.InstanceGroupManager{instanceGroupManager}), expectedTemplateName: templateName, @@ -1284,14 +1371,15 @@ func (f *fakeTime) Now() time.Time { func emptyCache() *GceCache { return &GceCache{ - migs: map[GceRef]Mig{mig.GceRef(): mig}, - instances: make(map[GceRef][]GceInstance), - instancesUpdateTime: make(map[GceRef]time.Time), - migTargetSizeCache: make(map[GceRef]int64), - migBaseNameCache: make(map[GceRef]string), - instanceTemplateNameCache: make(map[GceRef]string), - instanceTemplatesCache: make(map[GceRef]*gce.InstanceTemplate), - instancesFromUnknownMig: make(map[GceRef]bool), + migs: map[GceRef]Mig{mig.GceRef(): mig}, + instances: make(map[GceRef][]GceInstance), + instancesUpdateTime: make(map[GceRef]time.Time), + migTargetSizeCache: make(map[GceRef]int64), + migBaseNameCache: make(map[GceRef]string), + listManagedInstancesResultsCache: make(map[GceRef]string), + instanceTemplateNameCache: make(map[GceRef]string), + instanceTemplatesCache: make(map[GceRef]*gce.InstanceTemplate), + instancesFromUnknownMig: make(map[GceRef]bool), } } @@ -1368,6 +1456,16 @@ func fetchMigTemplateConst(template *gce.InstanceTemplate) func(GceRef, string) } } +func fetchListManagedInstancesResultsFail(_ GceRef) (string, error) { + return "", errFetchListManagedInstancesResults +} + +func fetchListManagedInstancesResultsConst(listManagedInstancesResults string) func(GceRef) (string, error) { + return func(GceRef) (string, error) { + return listManagedInstancesResults, nil + } +} + func fetchMachineTypeFail(_, _ string) (*gce.MachineType, error) { return nil, errFetchMachineType }