Skip to content

Commit 03f9775

Browse files
committed
fix: remove CLB security group on update from managed to BYO SG
This change fixes the leak security group of an owned/managed (created by controller) when BYO SG (annotation) is added to the service after it is created (update)
1 parent 12af74b commit 03f9775

File tree

3 files changed

+825
-12
lines changed

3 files changed

+825
-12
lines changed

pkg/providers/v1/aws_loadbalancer.go

Lines changed: 170 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"time"
3030

3131
"github.com/aws/aws-sdk-go-v2/aws"
32+
"github.com/aws/aws-sdk-go-v2/service/ec2"
3233
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
3334

3435
elb "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing"
@@ -1082,24 +1083,24 @@ func (c *Cloud) ensureLoadBalancer(ctx context.Context, namespacedName types.Nam
10821083

10831084
{
10841085
// Sync security groups
1085-
expected := sets.New[string](securityGroupIDs...)
1086-
actual := stringSetFromList(loadBalancer.SecurityGroups)
1086+
expected := sets.New(securityGroupIDs...)
1087+
actual := sets.New(loadBalancer.SecurityGroups...)
10871088

10881089
if !expected.Equal(actual) {
10891090
// This call just replaces the security groups, unlike e.g. subnets (!)
1090-
request := &elb.ApplySecurityGroupsToLoadBalancerInput{}
1091-
request.LoadBalancerName = aws.String(loadBalancerName)
1092-
if securityGroupIDs == nil {
1093-
request.SecurityGroups = nil
1094-
} else {
1095-
request.SecurityGroups = securityGroupIDs
1096-
}
1097-
klog.V(2).Info("Applying updated security groups to load balancer")
1098-
_, err := c.elb.ApplySecurityGroupsToLoadBalancer(ctx, request)
1099-
if err != nil {
1091+
klog.V(2).Infof("Applying updated security groups to load balancer %q", loadBalancerName)
1092+
if _, err := c.elb.ApplySecurityGroupsToLoadBalancer(ctx, &elb.ApplySecurityGroupsToLoadBalancerInput{
1093+
LoadBalancerName: aws.String(loadBalancerName),
1094+
SecurityGroups: securityGroupIDs,
1095+
}); err != nil {
11001096
return nil, fmt.Errorf("error applying AWS loadbalancer security groups: %q", err)
11011097
}
11021098
dirty = true
1099+
1100+
// Ensure the replaced security groups are removed from AWS when owned by the controller.
1101+
if sgsNotRemoved, errs := c.removeOwnedSecurityGroups(ctx, loadBalancerName, actual.UnsortedList()); len(errs) > 0 {
1102+
return nil, fmt.Errorf("error removing owned security groups %v: %v", sgsNotRemoved, errs)
1103+
}
11031104
}
11041105
}
11051106

@@ -1704,3 +1705,160 @@ func ValidateHealthCheck(s *elbtypes.HealthCheck) error {
17041705

17051706
return nil
17061707
}
1708+
1709+
// isOwnedSecurityGroup checks if the security group is owned by the controller
1710+
// by checking if the security group has the cluster tag and the value is "owned".
1711+
//
1712+
// Parameters:
1713+
// - `ctx`: The context for the operation.
1714+
// - `securityGroupID`: The ID of the security group to check.
1715+
//
1716+
// Returns:
1717+
// - `bool`: True if the security group is owned by the controller, false otherwise.
1718+
func (c *Cloud) isOwnedSecurityGroup(ctx context.Context, securityGroupID string) (bool, error) {
1719+
groups, err := c.ec2.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{
1720+
GroupIds: []string{securityGroupID},
1721+
})
1722+
if err != nil {
1723+
return false, fmt.Errorf("error retrieving security group %q: %w", securityGroupID, err)
1724+
}
1725+
if len(groups) == 0 {
1726+
return false, fmt.Errorf("security group %q not found", securityGroupID)
1727+
}
1728+
if len(groups) != 1 {
1729+
// This should not be possible - ids should be unique
1730+
return false, fmt.Errorf("multiple security groups found with same id %q", securityGroupID)
1731+
}
1732+
return c.tagging.hasClusterTagOwned(groups[0].Tags), nil
1733+
}
1734+
1735+
// removeOwnedSecurityGroups removes the owned/managed security groups from AWS.
1736+
// It revokes the ingress rules references to the security group to be removed
1737+
// and then deletes the security group.
1738+
// It is used to remove the security groups from the ELB when the owned SGs of an ELB is updated.
1739+
//
1740+
// Parameters:
1741+
// - `ctx`: The context for the operation.
1742+
// - `loadBalancerName`: The name of the load balancer.
1743+
// - `securityGroups`: The list of security group IDs to remove.
1744+
//
1745+
// Returns:
1746+
// - `error`: An error if any issue occurs while removing the owned security groups.
1747+
//
1748+
// Behavior:
1749+
// - For each security group in the list, it checks if it is owned by the controller.
1750+
// - It builds a list of security groups referencing the SG to be removed.
1751+
// - It revokes the rules references to the security group to be removed.
1752+
// - If it is owned by the controller, it deletes the security group.
1753+
func (c *Cloud) removeOwnedSecurityGroups(ctx context.Context, loadBalancerName string, securityGroups []string) ([]string, []error) {
1754+
groupsNotRemoved := []string{}
1755+
errs := []error{}
1756+
for _, sg := range securityGroups {
1757+
// Ensure security group is owned by the controller
1758+
isOwned, err := c.isOwnedSecurityGroup(ctx, sg)
1759+
if err != nil {
1760+
groupsNotRemoved = append(groupsNotRemoved, sg)
1761+
errs = append(errs, fmt.Errorf("unable to validate if security group %q is owned by the controller: %w", sg, err))
1762+
continue
1763+
}
1764+
1765+
// build a list of security groups referencing the SG to be removed
1766+
_, groupsLinkedPermissions, err := c.buildSecurityGroupRuleReferences(ctx, sg)
1767+
if err != nil {
1768+
groupsNotRemoved = append(groupsNotRemoved, sg)
1769+
errs = append(errs, fmt.Errorf("error building security group rule references for %q: %w", sg, err))
1770+
continue
1771+
}
1772+
1773+
// revoke the rules references to the security group to be removed
1774+
for sgTarget, sgPerms := range groupsLinkedPermissions {
1775+
klog.Infof("revoking security group ingress for %q", aws.ToString(sgTarget.GroupId))
1776+
if _, err := c.ec2.RevokeSecurityGroupIngress(ctx, &ec2.RevokeSecurityGroupIngressInput{
1777+
GroupId: sgTarget.GroupId,
1778+
IpPermissions: sgPerms.List(),
1779+
}); err != nil {
1780+
groupsNotRemoved = append(groupsNotRemoved, sg)
1781+
errs = append(errs, fmt.Errorf("error revoking security group ingress rules from %q: %w", aws.ToString(sgTarget.GroupId), err))
1782+
continue
1783+
}
1784+
}
1785+
1786+
// leave the loop if there are errors while revoking the rules references to the security group to be removed
1787+
if len(errs) > 0 {
1788+
klog.Warningf("one or more errors while revoking security group ingress rules, skipping %q deletion", groupsNotRemoved)
1789+
continue
1790+
}
1791+
1792+
// if the security group is not owned by the controller, skip the SG removal lifecycle
1793+
if !isOwned {
1794+
msg := fmt.Sprintf("security group %q is not owned by the controller, skipping remove lifecycle after update", sg)
1795+
klog.Warningf("%s", msg)
1796+
groupsNotRemoved = append(groupsNotRemoved, sg)
1797+
errs = append(errs, fmt.Errorf("%s", msg))
1798+
continue
1799+
}
1800+
1801+
// delete the owned/managed security group
1802+
klog.Infof("deleting security group %q", sg)
1803+
sgMap := make(map[string]struct{})
1804+
sgMap[sg] = struct{}{}
1805+
if err := c.deleteSecurityGroupsWithBackoff(ctx, loadBalancerName, sgMap); err != nil {
1806+
msg := fmt.Sprintf("error deleting security group %q: %v", sg, err)
1807+
klog.Warningf("%s", msg)
1808+
groupsNotRemoved = append(groupsNotRemoved, sg)
1809+
errs = append(errs, fmt.Errorf("error deleting security group %q: %w", sg, err))
1810+
}
1811+
}
1812+
return groupsNotRemoved, errs
1813+
}
1814+
1815+
// buildSecurityGroupRuleReferences builds a map of security groups with cluster tags and a map of security groups with linked permissions.
1816+
//
1817+
// Parameters:
1818+
// - ctx: The context for the request.
1819+
// - sgID: The ID of the security group to build references for.
1820+
//
1821+
// Returns:
1822+
// - map[*ec2types.SecurityGroup]bool: A map of security groups with cluster tags.
1823+
// - map[*ec2types.SecurityGroup]IPPermissionSet: A map of security groups with linked permissions.
1824+
// - error: An error if any issue occurs while building the security group rule references.
1825+
//
1826+
// Behavior:
1827+
// - Queries security group rules linked to the given ID.
1828+
// - Filters security group rules linked to the given ID.
1829+
// - Saves the rules to the groupsLinkedPermissions map.
1830+
// - Saves the cluster tags to the groupsWithTags map.
1831+
func (c *Cloud) buildSecurityGroupRuleReferences(ctx context.Context, sgID string) (map[*ec2types.SecurityGroup]bool, map[*ec2types.SecurityGroup]IPPermissionSet, error) {
1832+
groupsWithTags := make(map[*ec2types.SecurityGroup]bool)
1833+
groupsLinkedPermissions := make(map[*ec2types.SecurityGroup]IPPermissionSet)
1834+
sgsOut, err := c.ec2.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{
1835+
Filters: []ec2types.Filter{
1836+
newEc2Filter("ip-permission.group-id", sgID),
1837+
},
1838+
})
1839+
if err != nil {
1840+
return groupsWithTags, groupsLinkedPermissions, fmt.Errorf("error querying security groups for ELB: %q", err)
1841+
}
1842+
1843+
// Iterate over the security groups and build the maps.
1844+
for _, sg := range sgsOut {
1845+
groupsLinkedPermissions[&sg] = NewIPPermissionSet()
1846+
if !c.tagging.hasClusterTag(sg.Tags) {
1847+
klog.Warningf("security group %q is not cluster tagged, skip removing reference rule list to remove %q", aws.ToString(sg.GroupId), sgID)
1848+
continue
1849+
}
1850+
// Save group linked rules
1851+
for _, rule := range sg.IpPermissions {
1852+
if rule.UserIdGroupPairs != nil {
1853+
for _, pair := range rule.UserIdGroupPairs {
1854+
if pair.GroupId != nil && *pair.GroupId == sgID {
1855+
groupsLinkedPermissions[&sg].Insert(rule)
1856+
}
1857+
}
1858+
}
1859+
}
1860+
// Save cluster tags
1861+
groupsWithTags[&sg] = c.tagging.hasClusterTag(sg.Tags)
1862+
}
1863+
return groupsWithTags, groupsLinkedPermissions, nil
1864+
}

0 commit comments

Comments
 (0)