Skip to content

Commit e454240

Browse files
committed
refact/lb/sg: isolate sg deletion fragments to be reused
Isolating security group deletion fragments from EnsureLoadBalancerDeleted to buildSecurityGroupsToDelete and deleteSecurityGroupsWithBackoff, so the envaluation criteria and backof deletion can be reused in future implementations, i.e. NLB with Security Groups.
1 parent f4871ba commit e454240

File tree

1 file changed

+141
-97
lines changed

1 file changed

+141
-97
lines changed

pkg/providers/v1/aws.go

Lines changed: 141 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import (
4444
v1 "k8s.io/api/core/v1"
4545
"k8s.io/apimachinery/pkg/types"
4646
"k8s.io/apimachinery/pkg/util/sets"
47+
"k8s.io/apimachinery/pkg/util/wait"
4748
"k8s.io/client-go/informers"
4849
informercorev1 "k8s.io/client-go/informers/core/v1"
4950
clientset "k8s.io/client-go/kubernetes"
@@ -2837,6 +2838,139 @@ func (c *Cloud) updateInstanceSecurityGroupsForLoadBalancer(ctx context.Context,
28372838
return nil
28382839
}
28392840

2841+
// deleteSecurityGroupsWithBackoff deletes a list of security group IDs with retries and exponential backoff.
2842+
// The function attempts to delete each security group in the provided list, handling potential dependency violations
2843+
// caused by resources still being associated with the security groups (e.g., load balancers in the process of deletion).
2844+
//
2845+
// Parameters:
2846+
// - `ctx`: The context for the operation.
2847+
// - `svcName`: The name of the service associated with the security groups.
2848+
// - `securityGroupIDs`: A map of security group IDs to be deleted.
2849+
//
2850+
// Behavior:
2851+
// - If the list of security group IDs is empty, the function returns immediately.
2852+
// - The function retries deletion for up to 10 minutes, with an initial backoff of 5 seconds that doubles with each retry.
2853+
// - Dependency violations are logged and ignored, allowing retries until the timeout is reached.
2854+
// - If all security groups are successfully deleted, the function exits.
2855+
// - If the timeout is reached and some security groups remain, an error is returned.
2856+
//
2857+
// Returns:
2858+
// - `error`: An error if any security groups could not be deleted within the timeout period.
2859+
func (c *Cloud) deleteSecurityGroupsWithBackoff(ctx context.Context, svcName string, securityGroupIDs map[string]struct{}) error {
2860+
if len(securityGroupIDs) == 0 {
2861+
return nil
2862+
}
2863+
err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 10*time.Minute, true, func(ctx context.Context) (bool, error) {
2864+
for securityGroupID := range securityGroupIDs {
2865+
_, err := c.ec2.DeleteSecurityGroup(ctx, &ec2.DeleteSecurityGroupInput{
2866+
GroupId: &securityGroupID,
2867+
})
2868+
if err == nil {
2869+
delete(securityGroupIDs, securityGroupID)
2870+
continue
2871+
}
2872+
ignore := false
2873+
var ae smithy.APIError
2874+
if errors.As(err, &ae) {
2875+
if ae.ErrorCode() == "DependencyViolation" {
2876+
klog.V(2).Infof("Ignoring DependencyViolation while deleting load-balancer security group (%s), assuming because LB is in process of deleting", securityGroupID)
2877+
ignore = true
2878+
}
2879+
}
2880+
if !ignore {
2881+
return true, fmt.Errorf("error while deleting load balancer security group (%s): %q", securityGroupID, err)
2882+
}
2883+
}
2884+
2885+
if len(securityGroupIDs) == 0 {
2886+
klog.V(2).Info("Deleted all security groups for load balancer: ", svcName)
2887+
return true, nil
2888+
}
2889+
2890+
klog.V(2).Infof("Waiting for load-balancer %q to delete so we can delete security groups: %v", svcName, securityGroupIDs)
2891+
return false, nil
2892+
})
2893+
if err != nil {
2894+
ids := []string{}
2895+
for id := range securityGroupIDs {
2896+
ids = append(ids, id)
2897+
}
2898+
return fmt.Errorf("could not delete security groups %v for Load Balancer %q: %w", strings.Join(ids, ","), svcName, err)
2899+
}
2900+
return nil
2901+
}
2902+
2903+
// buildSecurityGroupsToDelete evaluates all deletion criteria and creates a list of valid security group IDs to be deleted.
2904+
// It returns two maps:
2905+
// - `securityGroupIDs`: A map of security group IDs that are eligible for deletion.
2906+
// - `taggedLBSecurityGroups`: A map of security group IDs that are tagged and associated with the load balancer.
2907+
// The function filters security groups based on the following criteria:
2908+
// - Excludes security groups defined in the Cloud Configuration.
2909+
// - Excludes security groups with no cluster tags.
2910+
// - Excludes security groups annotated with `service.beta.kubernetes.io/aws-load-balancer-security-groups` or
2911+
// `service.beta.kubernetes.io/aws-load-balancer-extra-security-groups`.
2912+
//
2913+
// Parameters:
2914+
// - `ctx`: The context for the operation.
2915+
// - `service`: The Kubernetes service object.
2916+
// - `lbSecurityGroups`: A list of security group IDs associated with the load balancer.
2917+
// Returns:
2918+
// - `securityGroupIDs`: A map of security group IDs to be deleted.
2919+
// - `taggedLBSecurityGroups`: A map of tagged security group IDs.
2920+
// - `error`: An error if the operation fails.
2921+
func (c *Cloud) buildSecurityGroupsToDelete(ctx context.Context, service *v1.Service, lbSecurityGroups []string) (map[string]struct{}, map[string]struct{}, error) {
2922+
securityGroupIDs := map[string]struct{}{}
2923+
taggedLBSecurityGroups := map[string]struct{}{}
2924+
2925+
describeRequest := &ec2.DescribeSecurityGroupsInput{}
2926+
describeRequest.Filters = []ec2types.Filter{
2927+
newEc2Filter("group-id", lbSecurityGroups...),
2928+
}
2929+
response, err := c.ec2.DescribeSecurityGroups(ctx, describeRequest)
2930+
if err != nil {
2931+
return nil, nil, fmt.Errorf("error querying security groups for ELB: %q", err)
2932+
}
2933+
2934+
annotatedSgSet := map[string]bool{}
2935+
annotatedSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerSecurityGroups])
2936+
annotatedExtraSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerExtraSecurityGroups])
2937+
annotatedSgsList = append(annotatedSgsList, annotatedExtraSgsList...)
2938+
2939+
for _, sg := range annotatedSgsList {
2940+
annotatedSgSet[sg] = true
2941+
}
2942+
2943+
for _, sg := range response {
2944+
sgID := aws.ToString(sg.GroupId)
2945+
2946+
if sgID == c.cfg.Global.ElbSecurityGroup {
2947+
//We don't want to delete a security group that was defined in the Cloud Configuration.
2948+
continue
2949+
}
2950+
if sgID == "" {
2951+
klog.Warningf("Ignoring empty security group in %s", service.Name)
2952+
continue
2953+
}
2954+
2955+
if !c.tagging.hasClusterTag(sg.Tags) {
2956+
klog.Warningf("Ignoring security group with no cluster tag in %s", service.Name)
2957+
continue
2958+
} else {
2959+
taggedLBSecurityGroups[sgID] = struct{}{}
2960+
}
2961+
2962+
// This is an extra protection of deletion of non provisioned Security Group which is annotated with `service.beta.kubernetes.io/aws-load-balancer-security-groups`.
2963+
if _, ok := annotatedSgSet[sgID]; ok {
2964+
klog.Warningf("Ignoring security group with annotation `service.beta.kubernetes.io/aws-load-balancer-security-groups` or service.beta.kubernetes.io/aws-load-balancer-extra-security-groups in %s", service.Name)
2965+
continue
2966+
}
2967+
2968+
securityGroupIDs[sgID] = struct{}{}
2969+
}
2970+
2971+
return securityGroupIDs, taggedLBSecurityGroups, nil
2972+
}
2973+
28402974
// EnsureLoadBalancerDeleted implements LoadBalancer.EnsureLoadBalancerDeleted.
28412975
func (c *Cloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName string, service *v1.Service) error {
28422976
if isLBExternal(service.Annotations) {
@@ -2908,57 +3042,13 @@ func (c *Cloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName strin
29083042
// if the load balancer security group is being deleted.
29093043
securityGroupIDs := map[string]struct{}{}
29103044
taggedLBSecurityGroups := map[string]struct{}{}
2911-
{
2912-
// Delete the security group(s) for the load balancer
2913-
// Note that this is annoying: the load balancer disappears from the API immediately, but it is still
2914-
// deleting in the background. We get a DependencyViolation until the load balancer has deleted itself
2915-
2916-
var loadBalancerSGs = lb.SecurityGroups
2917-
2918-
describeRequest := &ec2.DescribeSecurityGroupsInput{}
2919-
describeRequest.Filters = []ec2types.Filter{
2920-
newEc2Filter("group-id", loadBalancerSGs...),
2921-
}
2922-
response, err := c.ec2.DescribeSecurityGroups(ctx, describeRequest)
2923-
if err != nil {
2924-
return fmt.Errorf("error querying security groups for ELB: %q", err)
2925-
}
2926-
annotatedSgSet := map[string]bool{}
2927-
annotatedSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerSecurityGroups])
2928-
annotatedExtraSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerExtraSecurityGroups])
2929-
annotatedSgsList = append(annotatedSgsList, annotatedExtraSgsList...)
2930-
2931-
for _, sg := range annotatedSgsList {
2932-
annotatedSgSet[sg] = true
2933-
}
2934-
2935-
for _, sg := range response {
2936-
sgID := aws.ToString(sg.GroupId)
29373045

2938-
if sgID == c.cfg.Global.ElbSecurityGroup {
2939-
//We don't want to delete a security group that was defined in the Cloud Configuration.
2940-
continue
2941-
}
2942-
if sgID == "" {
2943-
klog.Warningf("Ignoring empty security group in %s", service.Name)
2944-
continue
2945-
}
2946-
2947-
if !c.tagging.hasClusterTag(sg.Tags) {
2948-
klog.Warningf("Ignoring security group with no cluster tag in %s", service.Name)
2949-
continue
2950-
} else {
2951-
taggedLBSecurityGroups[sgID] = struct{}{}
2952-
}
2953-
2954-
// This is an extra protection of deletion of non provisioned Security Group which is annotated with `service.beta.kubernetes.io/aws-load-balancer-security-groups`.
2955-
if _, ok := annotatedSgSet[sgID]; ok {
2956-
klog.Warningf("Ignoring security group with annotation `service.beta.kubernetes.io/aws-load-balancer-security-groups` or service.beta.kubernetes.io/aws-load-balancer-extra-security-groups in %s", service.Name)
2957-
continue
2958-
}
2959-
2960-
securityGroupIDs[sgID] = struct{}{}
2961-
}
3046+
// Delete the security group(s) for the load balancer
3047+
// Note that this is annoying: the load balancer disappears from the API immediately, but it is still
3048+
// deleting in the background. We get a DependencyViolation until the load balancer has deleted itself
3049+
securityGroupIDs, taggedLBSecurityGroups, err = c.buildSecurityGroupsToDelete(ctx, service, lb.SecurityGroups)
3050+
if err != nil {
3051+
return fmt.Errorf("unable to build security groups to delete: %w", err)
29623052
}
29633053

29643054
{
@@ -2993,53 +3083,7 @@ func (c *Cloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName strin
29933083
}
29943084
}
29953085

2996-
{
2997-
2998-
// Loop through and try to delete them
2999-
timeoutAt := time.Now().Add(time.Second * 600)
3000-
for {
3001-
for securityGroupID := range securityGroupIDs {
3002-
request := &ec2.DeleteSecurityGroupInput{}
3003-
request.GroupId = &securityGroupID
3004-
_, err := c.ec2.DeleteSecurityGroup(ctx, request)
3005-
if err == nil {
3006-
delete(securityGroupIDs, securityGroupID)
3007-
} else {
3008-
ignore := false
3009-
var ae smithy.APIError
3010-
if errors.As(err, &ae) {
3011-
if ae.ErrorCode() == "DependencyViolation" {
3012-
klog.V(2).Infof("Ignoring DependencyViolation while deleting load-balancer security group (%s), assuming because LB is in process of deleting", securityGroupID)
3013-
ignore = true
3014-
}
3015-
}
3016-
if !ignore {
3017-
return fmt.Errorf("error while deleting load balancer security group (%s): %q", securityGroupID, err)
3018-
}
3019-
}
3020-
}
3021-
3022-
if len(securityGroupIDs) == 0 {
3023-
klog.V(2).Info("Deleted all security groups for load balancer: ", service.Name)
3024-
break
3025-
}
3026-
3027-
if time.Now().After(timeoutAt) {
3028-
ids := []string{}
3029-
for id := range securityGroupIDs {
3030-
ids = append(ids, id)
3031-
}
3032-
3033-
return fmt.Errorf("timed out deleting ELB: %s. Could not delete security groups %v", service.Name, strings.Join(ids, ","))
3034-
}
3035-
3036-
klog.V(2).Info("Waiting for load-balancer to delete so we can delete security groups: ", service.Name)
3037-
3038-
time.Sleep(10 * time.Second)
3039-
}
3040-
}
3041-
3042-
return nil
3086+
return c.deleteSecurityGroupsWithBackoff(ctx, service.Name, securityGroupIDs)
30433087
}
30443088

30453089
// UpdateLoadBalancer implements LoadBalancer.UpdateLoadBalancer

0 commit comments

Comments
 (0)