@@ -29,6 +29,7 @@ import (
29
29
"time"
30
30
31
31
"github.com/aws/aws-sdk-go-v2/aws"
32
+ "github.com/aws/aws-sdk-go-v2/service/ec2"
32
33
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
33
34
34
35
elb "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing"
@@ -1082,24 +1083,24 @@ func (c *Cloud) ensureLoadBalancer(ctx context.Context, namespacedName types.Nam
1082
1083
1083
1084
{
1084
1085
// Sync security groups
1085
- expected := sets .New [ string ] (securityGroupIDs ... )
1086
- actual := stringSetFromList (loadBalancer .SecurityGroups )
1086
+ expected := sets .New (securityGroupIDs ... )
1087
+ actual := sets . New (loadBalancer .SecurityGroups ... )
1087
1088
1088
1089
if ! expected .Equal (actual ) {
1089
1090
// This call just replaces the security groups, unlike e.g. subnets (!)
1090
- request := & elb.ApplySecurityGroupsToLoadBalancerInput {}
1091
- request .LoadBalancerName = aws .String (loadBalancerName )
1092
- if securityGroupIDs == nil {
1093
- request .SecurityGroups = nil
1094
- } else {
1095
- request .SecurityGroups = securityGroupIDs
1096
- }
1097
- klog .V (2 ).Info ("Applying updated security groups to load balancer" )
1098
- _ , err := c .elb .ApplySecurityGroupsToLoadBalancer (ctx , request )
1099
- if err != nil {
1091
+ klog .V (2 ).Infof ("Applying updated security groups to load balancer %q" , loadBalancerName )
1092
+ if _ , err := c .elb .ApplySecurityGroupsToLoadBalancer (ctx , & elb.ApplySecurityGroupsToLoadBalancerInput {
1093
+ LoadBalancerName : aws .String (loadBalancerName ),
1094
+ SecurityGroups : securityGroupIDs ,
1095
+ }); err != nil {
1100
1096
return nil , fmt .Errorf ("error applying AWS loadbalancer security groups: %q" , err )
1101
1097
}
1102
1098
dirty = true
1099
+
1100
+ // Ensure the replaced security groups are removed from AWS when owned by the controller.
1101
+ if sgsNotRemoved , errs := c .removeOwnedSecurityGroups (ctx , loadBalancerName , actual .UnsortedList ()); len (errs ) > 0 {
1102
+ return nil , fmt .Errorf ("error removing owned security groups %v: %v" , sgsNotRemoved , errs )
1103
+ }
1103
1104
}
1104
1105
}
1105
1106
@@ -1704,3 +1705,173 @@ func ValidateHealthCheck(s *elbtypes.HealthCheck) error {
1704
1705
1705
1706
return nil
1706
1707
}
1708
+
1709
+ // isOwnedSecurityGroup checks if the security group is owned by the controller
1710
+ // by checking if the security group has the cluster ownership tag
1711
+ // (kubernetes.io/cluster/<clusterID>=owned).
1712
+ //
1713
+ // Parameters:
1714
+ // - `ctx`: The context for the operation.
1715
+ // - `securityGroupID`: The ID of the security group to check.
1716
+ //
1717
+ // Returns:
1718
+ // - `bool`: True if the security group is owned by the controller, false otherwise.
1719
+ // - `error`: An error if the security group cannot be retrieved, is not found,
1720
+ // or if multiple security groups are found with the same ID (unexpected).
1721
+ //
1722
+ // Note: This function calls the AWS DescribeSecurityGroups API to retrieve
1723
+ // the security group and check its tags.
1724
+ func (c * Cloud ) isOwnedSecurityGroup (ctx context.Context , securityGroupID string ) (bool , error ) {
1725
+ groups , err := c .ec2 .DescribeSecurityGroups (ctx , & ec2.DescribeSecurityGroupsInput {
1726
+ GroupIds : []string {securityGroupID },
1727
+ })
1728
+ if err != nil {
1729
+ return false , fmt .Errorf ("error retrieving security group %q: %w" , securityGroupID , err )
1730
+ }
1731
+ if len (groups ) == 0 {
1732
+ return false , fmt .Errorf ("security group %q not found" , securityGroupID )
1733
+ }
1734
+ if len (groups ) != 1 {
1735
+ // This should not be possible - ids should be unique
1736
+ return false , fmt .Errorf ("multiple security groups found with same id %q" , securityGroupID )
1737
+ }
1738
+ return c .tagging .hasClusterTagOwned (groups [0 ].Tags ), nil
1739
+ }
1740
+
1741
+ // removeOwnedSecurityGroups removes the owned/managed security groups from AWS.
1742
+ // It revokes ingress rules that reference the security groups to be removed,
1743
+ // then deletes the security groups that are owned by the controller.
1744
+ // This is used when updating load balancer security groups to clean up orphaned ones.
1745
+ //
1746
+ // Parameters:
1747
+ // - `ctx`: The context for the operation.
1748
+ // - `loadBalancerName`: The name of the load balancer (used for logging and deletion operations).
1749
+ // - `securityGroups`: The list of security group IDs to process for removal.
1750
+ //
1751
+ // Returns:
1752
+ // - `[]string`: Security group IDs that could not be removed due to errors.
1753
+ // - `[]error`: Collection of all errors encountered during the removal process.
1754
+ //
1755
+ // Behavior:
1756
+ // - For each security group, checks if it is owned by the controller using cluster tags.
1757
+ // - Identifies and revokes all ingress rules in other security groups that reference the target SG.
1758
+ // - Only deletes security groups that are owned by the controller (have cluster ownership tags).
1759
+ // - Non-owned security groups have their references revoked but are not deleted.
1760
+ // - Processing continues for all security groups even if some operations fail.
1761
+ // - Collects and returns all security groups that could not be fully processed.
1762
+ func (c * Cloud ) removeOwnedSecurityGroups (ctx context.Context , loadBalancerName string , securityGroups []string ) ([]string , []error ) {
1763
+ groupsNotRemoved := []string {}
1764
+ errs := []error {}
1765
+ for _ , sg := range securityGroups {
1766
+ // Ensure security group is owned by the controller
1767
+ isOwned , err := c .isOwnedSecurityGroup (ctx , sg )
1768
+ if err != nil {
1769
+ groupsNotRemoved = append (groupsNotRemoved , sg )
1770
+ errs = append (errs , fmt .Errorf ("unable to validate if security group %q is owned by the controller: %w" , sg , err ))
1771
+ continue
1772
+ }
1773
+
1774
+ // build a list of security groups referencing the SG to be removed
1775
+ _ , groupsLinkedPermissions , err := c .buildSecurityGroupRuleReferences (ctx , sg )
1776
+ if err != nil {
1777
+ groupsNotRemoved = append (groupsNotRemoved , sg )
1778
+ errs = append (errs , fmt .Errorf ("error building security group rule references for %q: %w" , sg , err ))
1779
+ continue
1780
+ }
1781
+
1782
+ // revoke the rules references to the security group to be removed
1783
+ for sgTarget , sgPerms := range groupsLinkedPermissions {
1784
+ klog .Infof ("revoking security group ingress for %q" , aws .ToString (sgTarget .GroupId ))
1785
+ if _ , err := c .ec2 .RevokeSecurityGroupIngress (ctx , & ec2.RevokeSecurityGroupIngressInput {
1786
+ GroupId : sgTarget .GroupId ,
1787
+ IpPermissions : sgPerms .List (),
1788
+ }); err != nil {
1789
+ groupsNotRemoved = append (groupsNotRemoved , sg )
1790
+ errs = append (errs , fmt .Errorf ("error revoking security group ingress rules from %q: %w" , aws .ToString (sgTarget .GroupId ), err ))
1791
+ continue
1792
+ }
1793
+ }
1794
+
1795
+ // leave the loop if there are errors while revoking the rules references to the security group to be removed
1796
+ if len (errs ) > 0 {
1797
+ klog .Warningf ("one or more errors while revoking security group ingress rules, skipping %q deletion" , groupsNotRemoved )
1798
+ continue
1799
+ }
1800
+
1801
+ // if the security group is not owned by the controller, skip the SG removal lifecycle
1802
+ if ! isOwned {
1803
+ msg := fmt .Sprintf ("security group %q is not owned by the controller, skipping remove lifecycle after update" , sg )
1804
+ klog .Warningf ("%s" , msg )
1805
+ groupsNotRemoved = append (groupsNotRemoved , sg )
1806
+ errs = append (errs , fmt .Errorf ("%s" , msg ))
1807
+ continue
1808
+ }
1809
+
1810
+ // delete the owned/managed security group
1811
+ klog .Infof ("deleting security group %q" , sg )
1812
+ sgMap := make (map [string ]struct {})
1813
+ sgMap [sg ] = struct {}{}
1814
+ if err := c .deleteSecurityGroupsWithBackoff (ctx , loadBalancerName , sgMap ); err != nil {
1815
+ msg := fmt .Sprintf ("error deleting security group %q: %v" , sg , err )
1816
+ klog .Warningf ("%s" , msg )
1817
+ groupsNotRemoved = append (groupsNotRemoved , sg )
1818
+ errs = append (errs , fmt .Errorf ("error deleting security group %q: %w" , sg , err ))
1819
+ }
1820
+ }
1821
+ return groupsNotRemoved , errs
1822
+ }
1823
+
1824
+ // buildSecurityGroupRuleReferences finds all security groups that have ingress rules
1825
+ // referencing the specified security group ID, and categorizes them based on cluster tagging.
1826
+ // This is used to identify dependencies before removing a security group.
1827
+ //
1828
+ // Parameters:
1829
+ // - ctx: The context for the request.
1830
+ // - sgID: The ID of the security group to find references for.
1831
+ //
1832
+ // Returns:
1833
+ // - map[*ec2types.SecurityGroup]bool: All security groups with ingress rules referencing sgID, mapped to their cluster tag status (true/false).
1834
+ // - map[*ec2types.SecurityGroup]IPPermissionSet: Only cluster-tagged security groups mapped to their ingress rules that reference sgID.
1835
+ // - error: An error if the AWS DescribeSecurityGroups API call fails.
1836
+ //
1837
+ // Behavior:
1838
+ // - Queries AWS for all security groups with ingress rules referencing the target sgID.
1839
+ // - For each found security group, records its cluster tag status in the first map.
1840
+ // - Only processes ingress rules for cluster-tagged security groups; non-cluster-tagged ones are skipped with a warning.
1841
+ // - Collects the specific ingress rules that reference the target security group from cluster-tagged SGs only.
1842
+ // - Non-cluster-tagged security groups appear in the first map (with false value) but are excluded from the second map.
1843
+ func (c * Cloud ) buildSecurityGroupRuleReferences (ctx context.Context , sgID string ) (map [* ec2types.SecurityGroup ]bool , map [* ec2types.SecurityGroup ]IPPermissionSet , error ) {
1844
+ groupsWithTags := make (map [* ec2types.SecurityGroup ]bool )
1845
+ groupsLinkedPermissions := make (map [* ec2types.SecurityGroup ]IPPermissionSet )
1846
+ sgsOut , err := c .ec2 .DescribeSecurityGroups (ctx , & ec2.DescribeSecurityGroupsInput {
1847
+ Filters : []ec2types.Filter {
1848
+ newEc2Filter ("ip-permission.group-id" , sgID ),
1849
+ },
1850
+ })
1851
+ if err != nil {
1852
+ return groupsWithTags , groupsLinkedPermissions , fmt .Errorf ("error querying security groups for ELB: %q" , err )
1853
+ }
1854
+
1855
+ // Iterate over the security groups and build the maps.
1856
+ for _ , sg := range sgsOut {
1857
+ // Save cluster tags
1858
+ groupsWithTags [& sg ] = c .tagging .hasClusterTag (sg .Tags )
1859
+ if ! c .tagging .hasClusterTag (sg .Tags ) {
1860
+ klog .Warningf ("security group %q is not cluster tagged, skip removing reference rule list to remove %q" , aws .ToString (sg .GroupId ), sgID )
1861
+ continue
1862
+ }
1863
+ // Save group linked rules
1864
+ groupsLinkedPermissions [& sg ] = NewIPPermissionSet ()
1865
+ for _ , rule := range sg .IpPermissions {
1866
+ if rule .UserIdGroupPairs != nil {
1867
+ for _ , pair := range rule .UserIdGroupPairs {
1868
+ if pair .GroupId != nil && * pair .GroupId == sgID {
1869
+ groupsLinkedPermissions [& sg ].Insert (rule )
1870
+ }
1871
+ }
1872
+ }
1873
+ }
1874
+
1875
+ }
1876
+ return groupsWithTags , groupsLinkedPermissions , nil
1877
+ }
0 commit comments