@@ -17,16 +17,20 @@ limitations under the License.
1717package azure
1818
1919import (
20+ "errors"
2021 "fmt"
2122 "math/rand"
23+ "net/http"
2224 "sort"
2325 "strings"
2426 "sync"
2527 "time"
2628
27- "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
28- "github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2017-05-10/resources"
29- azStorage "github.com/Azure/azure-sdk-for-go/storage"
29+ "github.com/Azure/azure-sdk-for-go/sdk/azcore"
30+ "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
31+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v7"
32+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources/v2"
33+ "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
3034 "k8s.io/utils/ptr"
3135
3236 apiv1 "k8s.io/api/core/v1"
@@ -82,10 +86,10 @@ func (as *AgentPool) initialize() error {
8286 ctx , cancel := getContextWithCancel ()
8387 defer cancel ()
8488
85- template , err := as .manager .azClient .deploymentClient .ExportTemplate (ctx , as .manager .config .ResourceGroup , as .manager .config .Deployment )
89+ template , err := as .manager .azClient .deploymentClient .ExportTemplate (ctx , as .manager .config .ResourceGroup , as .manager .config .Deployment , nil )
8690 if err != nil {
8791 klog .Errorf ("deploymentClient.ExportTemplate(%s, %s) failed: %v" , as .manager .config .ResourceGroup , as .manager .config .Deployment , err )
88- return err . Error ()
92+ return err
8993 }
9094
9195 as .template = template .Template .(map [string ]interface {})
@@ -136,10 +140,10 @@ func (as *AgentPool) Id() string {
136140 return as .Name
137141}
138142
139- func (as * AgentPool ) getVMsFromCache () ([]compute .VirtualMachine , error ) {
143+ func (as * AgentPool ) getVMsFromCache () ([]armcompute .VirtualMachine , error ) {
140144 allVMs := as .manager .azureCache .getVirtualMachines ()
141145 if _ , exists := allVMs [as .Name ]; ! exists {
142- return []compute .VirtualMachine {}, fmt .Errorf ("could not find VMs with poolName: %s" , as .Name )
146+ return []armcompute .VirtualMachine {}, fmt .Errorf ("could not find VMs with poolName: %s" , as .Name )
143147 }
144148 return allVMs [as .Name ], nil
145149}
@@ -157,7 +161,7 @@ func (as *AgentPool) GetVMIndexes() ([]int, map[int]string, error) {
157161 indexes := make ([]int , 0 )
158162 indexToVM := make (map [int ]string )
159163 for _ , instance := range instances {
160- index , err := GetVMNameIndex (instance .StorageProfile .OsDisk . OsType , * instance .Name )
164+ index , err := GetVMNameIndex (ptr . Deref ( instance .Properties . StorageProfile .OSDisk . OSType , armcompute . OperatingSystemTypesLinux ) , * instance .Name )
161165 if err != nil {
162166 return nil , nil , err
163167 }
@@ -211,27 +215,28 @@ func (as *AgentPool) TargetSize() (int, error) {
211215 return int (size ), nil
212216}
213217
214- func (as * AgentPool ) getAllSucceededAndFailedDeployments () ([]resources .DeploymentExtended , error ) {
218+ func (as * AgentPool ) getAllSucceededAndFailedDeployments () ([]armresources .DeploymentExtended , error ) {
215219 ctx , cancel := getContextWithCancel ()
216220 defer cancel ()
217221
218- allDeployments , rerr := as .manager .azClient .deploymentClient .List (ctx , as .manager .config .ResourceGroup )
219- if rerr != nil {
220- klog .Errorf ("getAllSucceededAndFailedDeployments: failed to list deployments with error: %v" , rerr .Error ())
221- return nil , rerr .Error ()
222- }
223-
224- result := make ([]resources.DeploymentExtended , 0 )
225- for _ , deployment := range allDeployments {
226- if deployment .Properties == nil || deployment .Properties .ProvisioningState == nil {
227- continue
222+ var deployments []armresources.DeploymentExtended
223+ pager := as .manager .azClient .deploymentClient .NewListByResourceGroupPager (as .manager .config .ResourceGroup , nil )
224+ for pager .More () {
225+ page , rerr := pager .NextPage (ctx )
226+ if rerr != nil {
227+ klog .Errorf ("getAllSucceededAndFailedDeployments: failed to list deployments with error: %v" , rerr .Error ())
228+ return nil , rerr
228229 }
229- if * deployment .Properties .ProvisioningState == "Succeeded" || * deployment .Properties .ProvisioningState == "Failed" {
230- result = append (result , deployment )
230+ for _ , deployment := range page .Value {
231+ if deployment .Properties == nil || deployment .Properties .ProvisioningState == nil {
232+ continue
233+ }
234+ if * deployment .Properties .ProvisioningState == "Succeeded" || * deployment .Properties .ProvisioningState == "Failed" {
235+ deployments = append (deployments , * deployment )
236+ }
231237 }
232238 }
233-
234- return result , rerr .Error ()
239+ return deployments , nil
235240}
236241
237242// deleteOutdatedDeployments keeps the newest deployments in the resource group and delete others,
@@ -256,7 +261,7 @@ func (as *AgentPool) deleteOutdatedDeployments() (err error) {
256261 }
257262
258263 sort .Slice (deployments , func (i , j int ) bool {
259- return deployments [i ].Properties .Timestamp .Time . After (deployments [j ].Properties .Timestamp . Time )
264+ return deployments [i ].Properties .Timestamp .After (* deployments [j ].Properties .Timestamp )
260265 })
261266
262267 toBeDeleted := deployments [as .manager .config .MaxDeploymentsCount :]
@@ -266,10 +271,18 @@ func (as *AgentPool) deleteOutdatedDeployments() (err error) {
266271
267272 errList := make ([]error , 0 )
268273 for _ , deployment := range toBeDeleted {
269- klog .V (4 ).Infof ("deleteOutdatedDeployments: starts deleting outdated deployment (%s)" , * deployment .Name )
270- rerr := as .manager .azClient .deploymentClient .Delete (ctx , as .manager .config .ResourceGroup , * deployment .Name )
274+ klog .V (4 ).Infof ("deleteOutdatedDeployments: start deleting outdated deployment (%s)" , * deployment .Name )
275+ poller , rerr := as .manager .azClient .deploymentClient .BeginDelete (ctx , as .manager .config .ResourceGroup , * deployment .Name , nil )
271276 if rerr != nil {
272- errList = append (errList , rerr .Error ())
277+ klog .Errorf ("deleteOutdatedDeployments: failed to begin deleting deployment (%s) with error: %v" , * deployment .Name , rerr .Error ())
278+ errList = append (errList , rerr )
279+ continue
280+ }
281+ _ , rerr = poller .PollUntilDone (ctx , & runtime.PollUntilDoneOptions {Frequency : 30 * time .Second })
282+ if rerr != nil {
283+ klog .Errorf ("deleteOutdatedDeployments: failed to delete deployment (%s) with error: %v" , * deployment .Name , rerr .Error ())
284+ errList = append (errList , rerr )
285+ continue
273286 }
274287 }
275288
@@ -313,26 +326,39 @@ func (as *AgentPool) IncreaseSize(delta int) error {
313326 if highestUsedIndex != 0 {
314327 countForTemplate += highestUsedIndex + 1 - curSize
315328 }
316- as .parameters [as .Name + "Count" ] = map [string ]int {"value" : countForTemplate }
317- as .parameters [as .Name + "Offset" ] = map [string ]int {"value" : highestUsedIndex + 1 }
329+ as .parameters [as .Name + "Count" ] = map [string ]interface {}{"value" : countForTemplate }
330+ as .parameters [as .Name + "Offset" ] = map [string ]interface {}{"value" : highestUsedIndex + 1 }
331+
332+ // Convert parameters to DeploymentParameter format
333+ deploymentParams := make (map [string ]* armresources.DeploymentParameter , len (as .parameters ))
334+ for key , value := range as .parameters {
335+ deploymentParams [key ] = & armresources.DeploymentParameter {
336+ Value : value ,
337+ }
338+ }
318339
319340 newDeploymentName := fmt .Sprintf ("cluster-autoscaler-%d" , rand .New (rand .NewSource (time .Now ().UnixNano ())).Int31 ())
320- newDeployment := resources .Deployment {
321- Properties : & resources .DeploymentProperties {
322- Template : & as .template ,
323- Parameters : & as . parameters ,
324- Mode : resources . Incremental ,
341+ newDeployment := armresources .Deployment {
342+ Properties : & armresources .DeploymentProperties {
343+ Template : as .template ,
344+ Parameters : deploymentParams ,
345+ Mode : ptr . To ( armresources . DeploymentModeIncremental ) ,
325346 },
326347 }
327348 ctx , cancel := getContextWithCancel ()
328349 defer cancel ()
329350 klog .V (3 ).Infof ("Waiting for deploymentClient.CreateOrUpdate(%s, %s, %v)" , as .manager .config .ResourceGroup , newDeploymentName , newDeployment )
330- rerr := as .manager .azClient .deploymentClient .CreateOrUpdate (ctx , as .manager .config .ResourceGroup , newDeploymentName , newDeployment , "" )
351+ poller , rerr := as .manager .azClient .deploymentClient .BeginCreateOrUpdate (ctx , as .manager .config .ResourceGroup , newDeploymentName , newDeployment , nil )
352+ if rerr != nil {
353+ klog .Errorf ("deploymentClient.BeginCreateOrUpdate for deployment %q failed: %v" , newDeploymentName , rerr .Error ())
354+ return rerr
355+ }
356+ resp , rerr := poller .PollUntilDone (ctx , & runtime.PollUntilDoneOptions {Frequency : 30 * time .Second })
331357 if rerr != nil {
332358 klog .Errorf ("deploymentClient.CreateOrUpdate for deployment %q failed: %v" , newDeploymentName , rerr .Error ())
333- return rerr . Error ()
359+ return rerr
334360 }
335- klog .V (3 ).Infof ("deploymentClient.CreateOrUpdate(%s, %s, %v) success" , as .manager .config .ResourceGroup , newDeploymentName , newDeployment )
361+ klog .V (3 ).Infof ("deploymentClient.CreateOrUpdate(%s, %s, %v) success" , as .manager .config .ResourceGroup , * resp . DeploymentExtended . Name , resp . DeploymentExtended )
336362
337363 // Update cache after scale success.
338364 as .curSize = int64 (expectedSize )
@@ -515,50 +541,64 @@ func (as *AgentPool) deleteBlob(accountName, vhdContainer, vhdBlob string) error
515541 ctx , cancel := getContextWithCancel ()
516542 defer cancel ()
517543
518- storageKeysResult , rerr := as .manager .azClient .storageAccountsClient .ListKeys (ctx , as .manager .config .SubscriptionID , as . manager . config . ResourceGroup , accountName )
544+ storageKeysResult , rerr := as .manager .azClient .storageAccountsClient .ListKeys (ctx , as .manager .config .ResourceGroup , accountName , nil )
519545 if rerr != nil {
520- return rerr .Error ()
546+ return rerr
547+ }
548+
549+ keys := storageKeysResult .Keys
550+ if len (keys ) == 0 {
551+ return fmt .Errorf ("no storage keys found for account %s" , accountName )
521552 }
522553
523- keys := * storageKeysResult .Keys
524- client , err := azStorage .NewBasicClientOnSovereignCloud (accountName , ptr .Deref (keys [0 ].Value , "" ), as .manager .env )
554+ // Construct service URL using the storage account endpoint
555+ serviceURL := fmt .Sprintf ("https://%s.blob.%s" , accountName , as .manager .env .StorageEndpointSuffix )
556+
557+ // Create a SharedKeyCredential
558+ credential , err := azblob .NewSharedKeyCredential (accountName , ptr .Deref (keys [0 ].Value , "" ))
525559 if err != nil {
526- return err
560+ return fmt . Errorf ( "failed to create shared key credential: %w" , err )
527561 }
528562
529- bs := client .GetBlobService ()
530- containerRef := bs .GetContainerReference (vhdContainer )
531- blobRef := containerRef .GetBlobReference (vhdBlob )
563+ // Create a service client
564+ serviceClient , err := azblob .NewClientWithSharedKeyCredential (serviceURL , credential , nil )
565+ if err != nil {
566+ return fmt .Errorf ("failed to create service client: %w" , err )
567+ }
532568
533- return blobRef .Delete (& azStorage.DeleteBlobOptions {})
569+ // Delete the blob
570+ _ , err = serviceClient .DeleteBlob (ctx , vhdContainer , vhdBlob , nil )
571+ return err
534572}
535573
536574// deleteVirtualMachine deletes a VM and any associated OS disk
537575func (as * AgentPool ) deleteVirtualMachine (name string ) error {
538576 ctx , cancel := getContextWithCancel ()
539577 defer cancel ()
540578
541- vm , rerr := as .manager .azClient .virtualMachinesClient .Get (ctx , as .manager .config .ResourceGroup , name , "" )
579+ vm , rerr := as .manager .azClient .virtualMachinesClient .Get (ctx , as .manager .config .ResourceGroup , name , nil )
542580 if rerr != nil {
543- if exists , _ := checkResourceExistsFromRetryError (rerr ); ! exists {
581+ // Check if it's a 404 error indicating resource doesn't exist
582+ var respErr * azcore.ResponseError
583+ if errors .As (rerr , & respErr ) && respErr .StatusCode == http .StatusNotFound {
544584 klog .V (2 ).Infof ("VirtualMachine %s/%s has already been removed" , as .manager .config .ResourceGroup , name )
545585 return nil
546586 }
547587
548588 klog .Errorf ("failed to get VM: %s/%s: %s" , as .manager .config .ResourceGroup , name , rerr .Error ())
549- return rerr . Error ()
589+ return rerr
550590 }
551591
552- vhd := vm .VirtualMachineProperties .StorageProfile .OsDisk .Vhd
553- managedDisk := vm .VirtualMachineProperties .StorageProfile .OsDisk .ManagedDisk
592+ vhd := vm .Properties .StorageProfile .OSDisk .Vhd
593+ managedDisk := vm .Properties .StorageProfile .OSDisk .ManagedDisk
554594 if vhd == nil && managedDisk == nil {
555595 klog .Errorf ("failed to get a valid os disk URI for VM: %s/%s" , as .manager .config .ResourceGroup , name )
556596 return fmt .Errorf ("os disk does not have a VHD URI" )
557597 }
558598
559- osDiskName := vm .VirtualMachineProperties .StorageProfile .OsDisk .Name
599+ osDiskName := vm .Properties .StorageProfile .OSDisk .Name
560600 var nicName string
561- nicID := (* vm .VirtualMachineProperties .NetworkProfile .NetworkInterfaces )[0 ].ID
601+ nicID := (vm .Properties .NetworkProfile .NetworkInterfaces )[0 ].ID
562602 if nicID == nil {
563603 klog .Warningf ("NIC ID is not set for VM (%s/%s)" , as .manager .config .ResourceGroup , name )
564604 } else {
@@ -574,22 +614,32 @@ func (as *AgentPool) deleteVirtualMachine(name string) error {
574614 defer deleteCancel ()
575615
576616 klog .Infof ("waiting for VirtualMachine deletion: %s/%s" , as .manager .config .ResourceGroup , name )
577- rerr = as .manager .azClient .virtualMachinesClient .Delete (deleteCtx , as .manager .config .ResourceGroup , name )
578- _ , realErr := checkResourceExistsFromRetryError (rerr )
579- if realErr != nil {
580- return realErr
617+ poller , rerr := as .manager .azClient .virtualMachinesClient .BeginDelete (deleteCtx , as .manager .config .ResourceGroup , name , nil )
618+ if rerr != nil {
619+ klog .Errorf ("failed to begin deleting VM: %s/%s: %s" , as .manager .config .ResourceGroup , name , rerr .Error ())
620+ return rerr
621+ }
622+ _ , rerr = poller .PollUntilDone (deleteCtx , & runtime.PollUntilDoneOptions {Frequency : 30 * time .Second })
623+ if rerr != nil {
624+ klog .Errorf ("failed to delete VM: %s/%s: %s" , as .manager .config .ResourceGroup , name , rerr .Error ())
625+ return rerr
581626 }
582627 klog .V (2 ).Infof ("VirtualMachine %s/%s removed" , as .manager .config .ResourceGroup , name )
583628
584629 if len (nicName ) > 0 {
585630 klog .Infof ("deleting nic: %s/%s" , as .manager .config .ResourceGroup , nicName )
586631 interfaceCtx , interfaceCancel := getContextWithCancel ()
587632 defer interfaceCancel ()
588- rerr := as .manager .azClient .interfacesClient .Delete (interfaceCtx , as .manager .config .ResourceGroup , nicName )
589- klog .Infof ("waiting for nic deletion: %s/%s" , as .manager .config .ResourceGroup , nicName )
590- _ , realErr := checkResourceExistsFromRetryError (rerr )
591- if realErr != nil {
592- return realErr
633+
634+ poller , rerr := as .manager .azClient .interfacesClient .BeginDelete (interfaceCtx , as .manager .config .ResourceGroup , nicName , nil )
635+ if rerr != nil {
636+ klog .Errorf ("failed to begin deleting nic: %s/%s: %s" , as .manager .config .ResourceGroup , nicName , rerr .Error ())
637+ return rerr
638+ }
639+ _ , rerr = poller .PollUntilDone (interfaceCtx , & runtime.PollUntilDoneOptions {Frequency : 30 * time .Second })
640+ if rerr != nil {
641+ klog .Errorf ("failed to delete nic: %s/%s: %s" , as .manager .config .ResourceGroup , nicName , rerr .Error ())
642+ return rerr
593643 }
594644 klog .V (2 ).Infof ("interface %s/%s removed" , as .manager .config .ResourceGroup , nicName )
595645 }
@@ -617,10 +667,15 @@ func (as *AgentPool) deleteVirtualMachine(name string) error {
617667 klog .Infof ("deleting managed disk: %s/%s" , as .manager .config .ResourceGroup , * osDiskName )
618668 disksCtx , disksCancel := getContextWithCancel ()
619669 defer disksCancel ()
620- rerr := as .manager .azClient .disksClient .Delete (disksCtx , as .manager .config .SubscriptionID , as .manager .config .ResourceGroup , * osDiskName )
621- _ , realErr := checkResourceExistsFromRetryError (rerr )
622- if realErr != nil {
623- return realErr
670+ poller , rerr := as .manager .azClient .disksClient .BeginDelete (disksCtx , as .manager .config .ResourceGroup , * osDiskName , nil )
671+ if rerr != nil {
672+ klog .Errorf ("failed to begin deleting managed disk: %s/%s: %s" , as .manager .config .ResourceGroup , * osDiskName , rerr .Error ())
673+ return rerr
674+ }
675+ _ , rerr = poller .PollUntilDone (disksCtx , & runtime.PollUntilDoneOptions {Frequency : 30 * time .Second })
676+ if rerr != nil {
677+ klog .Errorf ("failed to delete managed disk: %s/%s: %s" , as .manager .config .ResourceGroup , * osDiskName , rerr .Error ())
678+ return rerr
624679 }
625680 klog .V (2 ).Infof ("disk %s/%s removed" , as .manager .config .ResourceGroup , * osDiskName )
626681 }
0 commit comments