Skip to content

Commit 55d583b

Browse files
committed
refact/lb/sg: isolate sg deletion fragments to be reused
Isolating security group deletion fragments from EnsureLoadBalancerDeleted to buildSecurityGroupsToDelete and deleteSecurityGroupsWithBackoff, so the envaluation criteria and backof deletion can be reused in future implementations, i.e. NLB with Security Groups.
1 parent 7b15fbd commit 55d583b

File tree

1 file changed

+141
-97
lines changed

1 file changed

+141
-97
lines changed

pkg/providers/v1/aws.go

Lines changed: 141 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ import (
5050
v1 "k8s.io/api/core/v1"
5151
"k8s.io/apimachinery/pkg/types"
5252
"k8s.io/apimachinery/pkg/util/sets"
53+
"k8s.io/apimachinery/pkg/util/wait"
5354
"k8s.io/client-go/informers"
5455
informercorev1 "k8s.io/client-go/informers/core/v1"
5556
clientset "k8s.io/client-go/kubernetes"
@@ -2872,6 +2873,139 @@ func (c *Cloud) updateInstanceSecurityGroupsForLoadBalancer(ctx context.Context,
28722873
return nil
28732874
}
28742875

2876+
// deleteSecurityGroupsWithBackoff deletes a list of security group IDs with retries and exponential backoff.
2877+
// The function attempts to delete each security group in the provided list, handling potential dependency violations
2878+
// caused by resources still being associated with the security groups (e.g., load balancers in the process of deletion).
2879+
//
2880+
// Parameters:
2881+
// - `ctx`: The context for the operation.
2882+
// - `svcName`: The name of the service associated with the security groups.
2883+
// - `securityGroupIDs`: A map of security group IDs to be deleted.
2884+
//
2885+
// Behavior:
2886+
// - If the list of security group IDs is empty, the function returns immediately.
2887+
// - The function retries deletion for up to 10 minutes, with an initial backoff of 5 seconds that doubles with each retry.
2888+
// - Dependency violations are logged and ignored, allowing retries until the timeout is reached.
2889+
// - If all security groups are successfully deleted, the function exits.
2890+
// - If the timeout is reached and some security groups remain, an error is returned.
2891+
//
2892+
// Returns:
2893+
// - `error`: An error if any security groups could not be deleted within the timeout period.
2894+
func (c *Cloud) deleteSecurityGroupsWithBackoff(ctx context.Context, svcName string, securityGroupIDs map[string]struct{}) error {
2895+
if len(securityGroupIDs) == 0 {
2896+
return nil
2897+
}
2898+
err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 10*time.Minute, true, func(ctx context.Context) (bool, error) {
2899+
for securityGroupID := range securityGroupIDs {
2900+
_, err := c.ec2.DeleteSecurityGroup(ctx, &ec2.DeleteSecurityGroupInput{
2901+
GroupId: &securityGroupID,
2902+
})
2903+
if err == nil {
2904+
delete(securityGroupIDs, securityGroupID)
2905+
continue
2906+
}
2907+
ignore := false
2908+
var ae smithy.APIError
2909+
if errors.As(err, &ae) {
2910+
if ae.ErrorCode() == "DependencyViolation" {
2911+
klog.V(2).Infof("Ignoring DependencyViolation while deleting load-balancer security group (%s), assuming because LB is in process of deleting", securityGroupID)
2912+
ignore = true
2913+
}
2914+
}
2915+
if !ignore {
2916+
return true, fmt.Errorf("error while deleting load balancer security group (%s): %q", securityGroupID, err)
2917+
}
2918+
}
2919+
2920+
if len(securityGroupIDs) == 0 {
2921+
klog.V(2).Info("Deleted all security groups for load balancer: ", svcName)
2922+
return true, nil
2923+
}
2924+
2925+
klog.V(2).Infof("Waiting for load-balancer %q to delete so we can delete security groups: %v", svcName, securityGroupIDs)
2926+
return false, nil
2927+
})
2928+
if err != nil {
2929+
ids := []string{}
2930+
for id := range securityGroupIDs {
2931+
ids = append(ids, id)
2932+
}
2933+
return fmt.Errorf("timed out deleting ELB: %s. Could not delete security groups %v", svcName, strings.Join(ids, ","))
2934+
}
2935+
return nil
2936+
}
2937+
2938+
// buildSecurityGroupsToDelete evaluates all deletion criteria and creates a list of valid security group IDs to be deleted.
2939+
// It returns two maps:
2940+
// - `securityGroupIDs`: A map of security group IDs that are eligible for deletion.
2941+
// - `taggedLBSecurityGroups`: A map of security group IDs that are tagged and associated with the load balancer.
2942+
// The function filters security groups based on the following criteria:
2943+
// - Excludes security groups defined in the Cloud Configuration.
2944+
// - Excludes security groups with no cluster tags.
2945+
// - Excludes security groups annotated with `service.beta.kubernetes.io/aws-load-balancer-security-groups` or
2946+
// `service.beta.kubernetes.io/aws-load-balancer-extra-security-groups`.
2947+
//
2948+
// Parameters:
2949+
// - `ctx`: The context for the operation.
2950+
// - `service`: The Kubernetes service object.
2951+
// - `lbSecurityGroups`: A list of security group IDs associated with the load balancer.
2952+
// Returns:
2953+
// - `securityGroupIDs`: A map of security group IDs to be deleted.
2954+
// - `taggedLBSecurityGroups`: A map of tagged security group IDs.
2955+
// - `error`: An error if the operation fails.
2956+
func (c *Cloud) buildSecurityGroupsToDelete(ctx context.Context, service *v1.Service, lbSecurityGroups []string) (map[string]struct{}, map[string]struct{}, error) {
2957+
securityGroupIDs := map[string]struct{}{}
2958+
taggedLBSecurityGroups := map[string]struct{}{}
2959+
2960+
describeRequest := &ec2.DescribeSecurityGroupsInput{}
2961+
describeRequest.Filters = []ec2types.Filter{
2962+
newEc2Filter("group-id", lbSecurityGroups...),
2963+
}
2964+
response, err := c.ec2.DescribeSecurityGroups(ctx, describeRequest)
2965+
if err != nil {
2966+
return nil, nil, fmt.Errorf("error querying security groups for ELB: %q", err)
2967+
}
2968+
2969+
annotatedSgSet := map[string]bool{}
2970+
annotatedSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerSecurityGroups])
2971+
annotatedExtraSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerExtraSecurityGroups])
2972+
annotatedSgsList = append(annotatedSgsList, annotatedExtraSgsList...)
2973+
2974+
for _, sg := range annotatedSgsList {
2975+
annotatedSgSet[sg] = true
2976+
}
2977+
2978+
for _, sg := range response {
2979+
sgID := aws.StringValue(sg.GroupId)
2980+
2981+
if sgID == c.cfg.Global.ElbSecurityGroup {
2982+
//We don't want to delete a security group that was defined in the Cloud Configuration.
2983+
continue
2984+
}
2985+
if sgID == "" {
2986+
klog.Warningf("Ignoring empty security group in %s", service.Name)
2987+
continue
2988+
}
2989+
2990+
if !c.tagging.hasClusterTag(sg.Tags) {
2991+
klog.Warningf("Ignoring security group with no cluster tag in %s", service.Name)
2992+
continue
2993+
} else {
2994+
taggedLBSecurityGroups[sgID] = struct{}{}
2995+
}
2996+
2997+
// This is an extra protection of deletion of non provisioned Security Group which is annotated with `service.beta.kubernetes.io/aws-load-balancer-security-groups`.
2998+
if _, ok := annotatedSgSet[sgID]; ok {
2999+
klog.Warningf("Ignoring security group with annotation `service.beta.kubernetes.io/aws-load-balancer-security-groups` or service.beta.kubernetes.io/aws-load-balancer-extra-security-groups in %s", service.Name)
3000+
continue
3001+
}
3002+
3003+
securityGroupIDs[sgID] = struct{}{}
3004+
}
3005+
3006+
return securityGroupIDs, taggedLBSecurityGroups, nil
3007+
}
3008+
28753009
// EnsureLoadBalancerDeleted implements LoadBalancer.EnsureLoadBalancerDeleted.
28763010
func (c *Cloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName string, service *v1.Service) error {
28773011
if isLBExternal(service.Annotations) {
@@ -2943,57 +3077,13 @@ func (c *Cloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName strin
29433077
// if the load balancer security group is being deleted.
29443078
securityGroupIDs := map[string]struct{}{}
29453079
taggedLBSecurityGroups := map[string]struct{}{}
2946-
{
2947-
// Delete the security group(s) for the load balancer
2948-
// Note that this is annoying: the load balancer disappears from the API immediately, but it is still
2949-
// deleting in the background. We get a DependencyViolation until the load balancer has deleted itself
2950-
2951-
var loadBalancerSGs = lb.SecurityGroups
2952-
2953-
describeRequest := &ec2.DescribeSecurityGroupsInput{}
2954-
describeRequest.Filters = []ec2types.Filter{
2955-
newEc2Filter("group-id", loadBalancerSGs...),
2956-
}
2957-
response, err := c.ec2.DescribeSecurityGroups(ctx, describeRequest)
2958-
if err != nil {
2959-
return fmt.Errorf("error querying security groups for ELB: %q", err)
2960-
}
2961-
annotatedSgSet := map[string]bool{}
2962-
annotatedSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerSecurityGroups])
2963-
annotatedExtraSgsList := getSGListFromAnnotation(service.Annotations[ServiceAnnotationLoadBalancerExtraSecurityGroups])
2964-
annotatedSgsList = append(annotatedSgsList, annotatedExtraSgsList...)
2965-
2966-
for _, sg := range annotatedSgsList {
2967-
annotatedSgSet[sg] = true
2968-
}
2969-
2970-
for _, sg := range response {
2971-
sgID := aws.StringValue(sg.GroupId)
2972-
2973-
if sgID == c.cfg.Global.ElbSecurityGroup {
2974-
//We don't want to delete a security group that was defined in the Cloud Configuration.
2975-
continue
2976-
}
2977-
if sgID == "" {
2978-
klog.Warningf("Ignoring empty security group in %s", service.Name)
2979-
continue
2980-
}
2981-
2982-
if !c.tagging.hasClusterTag(sg.Tags) {
2983-
klog.Warningf("Ignoring security group with no cluster tag in %s", service.Name)
2984-
continue
2985-
} else {
2986-
taggedLBSecurityGroups[sgID] = struct{}{}
2987-
}
29883080

2989-
// This is an extra protection of deletion of non provisioned Security Group which is annotated with `service.beta.kubernetes.io/aws-load-balancer-security-groups`.
2990-
if _, ok := annotatedSgSet[sgID]; ok {
2991-
klog.Warningf("Ignoring security group with annotation `service.beta.kubernetes.io/aws-load-balancer-security-groups` or service.beta.kubernetes.io/aws-load-balancer-extra-security-groups in %s", service.Name)
2992-
continue
2993-
}
2994-
2995-
securityGroupIDs[sgID] = struct{}{}
2996-
}
3081+
// Delete the security group(s) for the load balancer
3082+
// Note that this is annoying: the load balancer disappears from the API immediately, but it is still
3083+
// deleting in the background. We get a DependencyViolation until the load balancer has deleted itself
3084+
securityGroupIDs, taggedLBSecurityGroups, err = c.buildSecurityGroupsToDelete(ctx, service, lb.SecurityGroups)
3085+
if err != nil {
3086+
return fmt.Errorf("unable to build security groups to delete: %w", err)
29973087
}
29983088

29993089
{
@@ -3028,53 +3118,7 @@ func (c *Cloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName strin
30283118
}
30293119
}
30303120

3031-
{
3032-
3033-
// Loop through and try to delete them
3034-
timeoutAt := time.Now().Add(time.Second * 600)
3035-
for {
3036-
for securityGroupID := range securityGroupIDs {
3037-
request := &ec2.DeleteSecurityGroupInput{}
3038-
request.GroupId = &securityGroupID
3039-
_, err := c.ec2.DeleteSecurityGroup(ctx, request)
3040-
if err == nil {
3041-
delete(securityGroupIDs, securityGroupID)
3042-
} else {
3043-
ignore := false
3044-
var ae smithy.APIError
3045-
if errors.As(err, &ae) {
3046-
if ae.ErrorCode() == "DependencyViolation" {
3047-
klog.V(2).Infof("Ignoring DependencyViolation while deleting load-balancer security group (%s), assuming because LB is in process of deleting", securityGroupID)
3048-
ignore = true
3049-
}
3050-
}
3051-
if !ignore {
3052-
return fmt.Errorf("error while deleting load balancer security group (%s): %q", securityGroupID, err)
3053-
}
3054-
}
3055-
}
3056-
3057-
if len(securityGroupIDs) == 0 {
3058-
klog.V(2).Info("Deleted all security groups for load balancer: ", service.Name)
3059-
break
3060-
}
3061-
3062-
if time.Now().After(timeoutAt) {
3063-
ids := []string{}
3064-
for id := range securityGroupIDs {
3065-
ids = append(ids, id)
3066-
}
3067-
3068-
return fmt.Errorf("timed out deleting ELB: %s. Could not delete security groups %v", service.Name, strings.Join(ids, ","))
3069-
}
3070-
3071-
klog.V(2).Info("Waiting for load-balancer to delete so we can delete security groups: ", service.Name)
3072-
3073-
time.Sleep(10 * time.Second)
3074-
}
3075-
}
3076-
3077-
return nil
3121+
return c.deleteSecurityGroupsWithBackoff(ctx, service.Name, securityGroupIDs)
30783122
}
30793123

30803124
// UpdateLoadBalancer implements LoadBalancer.UpdateLoadBalancer

0 commit comments

Comments
 (0)