diff --git a/api/v1beta1/azurecluster_webhook.go b/api/v1beta1/azurecluster_webhook.go index def1503c053..816d1a0afdd 100644 --- a/api/v1beta1/azurecluster_webhook.go +++ b/api/v1beta1/azurecluster_webhook.go @@ -169,6 +169,43 @@ func (*AzureClusterWebhook) ValidateUpdate(_ context.Context, oldRaw, newObj run allErrs = append(allErrs, err) } + // Validate availability zones are immutable for load balancers + if c.Spec.NetworkSpec.APIServerLB != nil && old.Spec.NetworkSpec.APIServerLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.APIServerLB.AvailabilityZones, + old.Spec.NetworkSpec.APIServerLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "apiServerLB", "availabilityZones"), + c.Spec.NetworkSpec.APIServerLB.AvailabilityZones, + "field is immutable")) + } + } + + if c.Spec.NetworkSpec.NodeOutboundLB != nil && old.Spec.NetworkSpec.NodeOutboundLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones, + old.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "nodeOutboundLB", "availabilityZones"), + c.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones, + "field is immutable")) + } + } + + if c.Spec.NetworkSpec.ControlPlaneOutboundLB != nil && old.Spec.NetworkSpec.ControlPlaneOutboundLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones, + old.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "controlPlaneOutboundLB", "availabilityZones"), + c.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones, + "field is immutable")) + } + } + allErrs = append(allErrs, c.validateSubnetUpdate(old)...) if len(allErrs) == 0 { diff --git a/api/v1beta1/types.go b/api/v1beta1/types.go index 3423976cbdd..d20c1f9539c 100644 --- a/api/v1beta1/types.go +++ b/api/v1beta1/types.go @@ -364,6 +364,14 @@ type LoadBalancerSpec struct { // BackendPool describes the backend pool of the load balancer. // +optional BackendPool BackendPool `json:"backendPool,omitempty"` + // AvailabilityZones is a list of availability zones for the load balancer. + // When specified for an internal load balancer, the frontend IP configuration + // will be zone-redundant across the specified zones. + // For public load balancers, this should be set on the associated public IP addresses instead. + // +optional + // +listType=set + // +kubebuilder:validation:MaxItems=3 + AvailabilityZones []string `json:"availabilityZones,omitempty"` LoadBalancerClassSpec `json:",inline"` } diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 71792a2ce24..a281b35dc05 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -3441,6 +3441,11 @@ func (in *LoadBalancerSpec) DeepCopyInto(out *LoadBalancerSpec) { **out = **in } out.BackendPool = in.BackendPool + if in.AvailabilityZones != nil { + in, out := &in.AvailabilityZones, &out.AvailabilityZones + *out = make([]string, len(*in)) + copy(*out, *in) + } in.LoadBalancerClassSpec.DeepCopyInto(&out.LoadBalancerClassSpec) } diff --git a/azure/scope/cluster.go b/azure/scope/cluster.go index c9760b5ad7e..0c00daceb45 100644 --- a/azure/scope/cluster.go +++ b/azure/scope/cluster.go @@ -267,6 +267,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.APIServerLB().IdleTimeoutInMinutes, AdditionalTags: s.AdditionalTags(), AdditionalPorts: s.AdditionalAPIServerLBPorts(), + AvailabilityZones: s.APIServerLB().AvailabilityZones, } if s.APIServerLB().FrontendIPs != nil { @@ -301,6 +302,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.APIServerLB().IdleTimeoutInMinutes, AdditionalTags: s.AdditionalTags(), AdditionalPorts: s.AdditionalAPIServerLBPorts(), + AvailabilityZones: s.APIServerLB().AvailabilityZones, } privateIPFound := false @@ -348,6 +350,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.NodeOutboundLB().IdleTimeoutInMinutes, Role: infrav1.NodeOutboundRole, AdditionalTags: s.AdditionalTags(), + AvailabilityZones: s.NodeOutboundLB().AvailabilityZones, }) } @@ -369,6 +372,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.ControlPlaneOutboundLB().IdleTimeoutInMinutes, Role: infrav1.ControlPlaneOutboundRole, AdditionalTags: s.AdditionalTags(), + AvailabilityZones: s.ControlPlaneOutboundLB().AvailabilityZones, }) } diff --git a/azure/services/loadbalancers/loadbalancers_test.go b/azure/services/loadbalancers/loadbalancers_test.go index 3a61789a507..f6233a6a120 100644 --- a/azure/services/loadbalancers/loadbalancers_test.go +++ b/azure/services/loadbalancers/loadbalancers_test.go @@ -111,6 +111,30 @@ var ( APIServerPort: 6443, } + fakeInternalAPILBSpecWithZones = LBSpec{ + Name: "my-private-lb", + ResourceGroup: "my-rg", + SubscriptionID: "123", + ClusterName: "my-cluster", + Location: "my-location", + Role: infrav1.APIServerRole, + Type: infrav1.Internal, + SKU: infrav1.SKUStandard, + SubnetName: "my-cp-subnet", + BackendPoolName: "my-private-lb-backendPool", + IdleTimeoutInMinutes: ptr.To[int32](4), + AvailabilityZones: []string{"1", "2", "3"}, + FrontendIPConfigs: []infrav1.FrontendIP{ + { + Name: "my-private-lb-frontEnd", + FrontendIPClass: infrav1.FrontendIPClass{ + PrivateIPAddress: "10.0.0.10", + }, + }, + }, + APIServerPort: 6443, + } + fakeNodeOutboundLBSpec = LBSpec{ Name: "my-cluster", ResourceGroup: "my-rg", diff --git a/azure/services/loadbalancers/spec.go b/azure/services/loadbalancers/spec.go index 5fd3cedf9f2..625fc010b4e 100644 --- a/azure/services/loadbalancers/spec.go +++ b/azure/services/loadbalancers/spec.go @@ -48,6 +48,7 @@ type LBSpec struct { IdleTimeoutInMinutes *int32 AdditionalTags map[string]string AdditionalPorts []infrav1.LoadBalancerPort + AvailabilityZones []string } // ResourceName returns the name of the load balancer. @@ -167,6 +168,16 @@ func (s *LBSpec) Parameters(_ context.Context, existing interface{}) (parameters func getFrontendIPConfigs(lbSpec LBSpec) ([]*armnetwork.FrontendIPConfiguration, []*armnetwork.SubResource) { frontendIPConfigurations := make([]*armnetwork.FrontendIPConfiguration, 0) frontendIDs := make([]*armnetwork.SubResource, 0) + + // Convert availability zones to []*string for Azure SDK + var zones []*string + if len(lbSpec.AvailabilityZones) > 0 { + zones = make([]*string, len(lbSpec.AvailabilityZones)) + for i, zone := range lbSpec.AvailabilityZones { + zones[i] = ptr.To(zone) + } + } + for _, ipConfig := range lbSpec.FrontendIPConfigs { var properties armnetwork.FrontendIPConfigurationPropertiesFormat if lbSpec.Type == infrav1.Internal { @@ -187,6 +198,7 @@ func getFrontendIPConfigs(lbSpec LBSpec) ([]*armnetwork.FrontendIPConfiguration, frontendIPConfigurations = append(frontendIPConfigurations, &armnetwork.FrontendIPConfiguration{ Properties: &properties, Name: ptr.To(ipConfig.Name), + Zones: zones, }) frontendIDs = append(frontendIDs, &armnetwork.SubResource{ ID: ptr.To(azure.FrontendIPConfigID(lbSpec.SubscriptionID, lbSpec.ResourceGroup, lbSpec.Name, ipConfig.Name)), diff --git a/azure/services/loadbalancers/spec_test.go b/azure/services/loadbalancers/spec_test.go index 9e75779a7c1..14f2f2a3082 100644 --- a/azure/services/loadbalancers/spec_test.go +++ b/azure/services/loadbalancers/spec_test.go @@ -178,6 +178,22 @@ func TestParameters(t *testing.T) { }, expectedError: "", }, + { + name: "internal load balancer with availability zones", + spec: &fakeInternalAPILBSpecWithZones, + existing: nil, + expect: func(g *WithT, result interface{}) { + g.Expect(result).To(BeAssignableToTypeOf(armnetwork.LoadBalancer{})) + lb := result.(armnetwork.LoadBalancer) + // Verify zones are set on frontend IP configuration + g.Expect(lb.Properties.FrontendIPConfigurations).To(HaveLen(1)) + g.Expect(lb.Properties.FrontendIPConfigurations[0].Zones).To(HaveLen(3)) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[0]).To(Equal("1")) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[1]).To(Equal("2")) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[2]).To(Equal("3")) + }, + expectedError: "", + }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml index 1edca6b7df9..29df5d71e00 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml @@ -689,6 +689,17 @@ spec: description: APIServerLB is the configuration for the control-plane load balancer. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. @@ -772,6 +783,17 @@ spec: ControlPlaneOutboundLB is the configuration for the control-plane outbound load balancer. This is different from APIServerLB, and is used only in private clusters (optionally) for enabling outbound traffic. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. @@ -854,6 +876,17 @@ spec: description: NodeOutboundLB is the configuration for the node outbound load balancer. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. diff --git a/docs/book/src/SUMMARY.md b/docs/book/src/SUMMARY.md index b437fc7b3e3..a4f14b92d0e 100644 --- a/docs/book/src/SUMMARY.md +++ b/docs/book/src/SUMMARY.md @@ -35,6 +35,7 @@ - [Externally managed Azure infrastructure](./self-managed/externally-managed-azure-infrastructure.md) - [Failure Domains](./self-managed/failure-domains.md) - [Flatcar](./self-managed/flatcar.md) + - [Load Balancer Zone Redundancy](./self-managed/load-balancer-zone-redundancy.md) - [GPU-enabled Clusters](./self-managed/gpu.md) - [IPv6](./self-managed/ipv6.md) - [Machine Pools (VMSS)](./self-managed/machinepools.md) diff --git a/docs/book/src/self-managed/load-balancer-zone-redundancy.md b/docs/book/src/self-managed/load-balancer-zone-redundancy.md new file mode 100644 index 00000000000..8e4ea121fe9 --- /dev/null +++ b/docs/book/src/self-managed/load-balancer-zone-redundancy.md @@ -0,0 +1,294 @@ +# Load Balancer Zone Redundancy + +## Zone Redundancy for Load Balancers in Azure + +Azure Load Balancers can be configured as zone-redundant to ensure high availability across multiple availability zones within a region. A zone-redundant load balancer distributes traffic across all zones, providing resilience against zone failures. + +**Key concepts:** +- Zone redundancy for load balancers is configured through the **frontend IP configuration** +- For **internal load balancers**, zones are set directly on the frontend IP configuration +- For **public load balancers**, zones are inherited from the zone configuration of the public IP address +- **Zones are immutable** - once created, they cannot be changed, added, or removed + +Full details can be found in the [Azure Load Balancer reliability documentation](https://learn.microsoft.com/azure/reliability/reliability-load-balancer). + +## Configuring Zone-Redundant Load Balancers + +CAPZ exposes the `availabilityZones` field on load balancer specifications to enable zone redundancy. + +### Internal Load Balancers + +For internal load balancers (such as a private API server), you can configure availability zones directly on the load balancer spec: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Internal + availabilityZones: + - "1" + - "2" + - "3" +``` + +This configuration creates a zone-redundant internal load balancer with frontend IPs distributed across zones 1, 2, and 3. + +### Public Load Balancers + +For public load balancers, zone redundancy is primarily controlled by the public IP addresses. However, you can still set `availabilityZones` on the load balancer for consistency: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Public + availabilityZones: + - "1" + - "2" + - "3" +``` + +> **Note**: For public load balancers, ensure that the associated public IP addresses are also zone-redundant for complete zone redundancy. + +### Node Outbound Load Balancer + +You can also configure zone redundancy for node outbound load balancers: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: westus2 + networkSpec: + nodeOutboundLB: + type: Public + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: node-outbound-ip + publicIP: + name: node-outbound-publicip +``` + +### Control Plane Outbound Load Balancer + +For clusters with private API servers, you can configure the control plane outbound load balancer: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Internal + availabilityZones: + - "1" + - "2" + - "3" + controlPlaneOutboundLB: + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: controlplane-outbound-ip + publicIP: + name: controlplane-outbound-publicip +``` + +## Complete Example: Highly Available Cluster + +Here's a complete example of a highly available cluster with zone-redundant load balancers: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ha-cluster + namespace: default +spec: + location: eastus + resourceGroup: ha-cluster-rg + networkSpec: + # Zone-redundant internal API server load balancer + apiServerLB: + type: Internal + name: ha-cluster-internal-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: api-server-internal-ip + privateIPAddress: "10.0.0.100" + + # Zone-redundant control plane outbound load balancer + controlPlaneOutboundLB: + name: ha-cluster-cp-outbound-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: cp-outbound-ip + publicIP: + name: cp-outbound-publicip + + # Zone-redundant node outbound load balancer + nodeOutboundLB: + name: ha-cluster-node-outbound-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: node-outbound-ip + publicIP: + name: node-outbound-publicip + + # Custom VNet configuration + vnet: + name: ha-cluster-vnet + cidrBlocks: + - "10.0.0.0/16" + + subnets: + - name: control-plane-subnet + role: control-plane + cidrBlocks: + - "10.0.0.0/24" + - name: node-subnet + role: node + cidrBlocks: + - "10.0.1.0/24" +``` + +## Important Considerations + +### Immutability + +Once a load balancer is created with availability zones, the zone configuration **cannot be changed**. This is an Azure platform limitation. To change zones, you must: + +1. Delete the load balancer +2. Recreate it with the new zone configuration + +> **Warning**: Changing load balancer zones requires recreating the cluster's load balancers, which will cause service interruption. + +### Region Support + +Not all Azure regions support availability zones. Before configuring zone-redundant load balancers, verify that your target region supports zones: + +```bash +az vm list-skus -l --zone -o table +``` + +### Standard SKU Requirement + +Zone-redundant load balancers require the **Standard SKU**. CAPZ uses Standard SKU by default, so no additional configuration is needed. + +### Backend Pool Placement + +For optimal high availability: +- Spread your control plane nodes across all availability zones +- Spread your worker nodes across all availability zones +- Ensure backend pool members exist in the same zones as the load balancer + +See the [Failure Domains](failure-domains.md) documentation for details on distributing VMs across zones. + +## Migration from Non-Zone-Redundant Load Balancers + +If you have an existing cluster without zone-redundant load balancers, migration requires careful planning: + +### For New Clusters + +When creating a new cluster, simply include the `availabilityZones` field in your `AzureCluster` specification from the start. + +### For Existing Clusters + +**Migration is not straightforward** because: +1. Azure does not allow modifying zones on existing load balancers +2. CAPZ's webhook validation prevents zone changes to enforce this immutability +3. Load balancer recreation requires cluster downtime + +**Recommended approach for existing clusters:** +1. Create a new cluster with zone-redundant configuration +2. Migrate workloads to the new cluster +3. Decommission the old cluster + +**Alternative for development/test clusters:** +1. Delete the `AzureCluster` resource (this will delete the infrastructure) +2. Recreate the `AzureCluster` with `availabilityZones` configured +3. Reconcile the cluster + +> **Important**: The alternative approach causes significant downtime and should only be used in non-production environments. + +## Troubleshooting + +### Load Balancer Not Zone-Redundant + +If your load balancer is not zone-redundant despite configuration: + +1. **Verify the zones are set in spec:** + ```bash + kubectl get azurecluster -o jsonpath='{.spec.networkSpec.apiServerLB.availabilityZones}' + ``` + +2. **Check the Azure load balancer frontend configuration:** + ```bash + az network lb frontend-ip show \ + --lb-name \ + --name \ + --resource-group \ + --query zones + ``` + +3. **Verify the region supports zones:** + ```bash + az vm list-skus -l --zone -o table | grep -i standardsku + ``` + +### Validation Errors + +If you encounter validation errors when updating `availabilityZones`: + +``` +field is immutable +``` + +This is expected behavior. Zones cannot be modified after creation. You must recreate the load balancer with the desired configuration. + +## Best Practices + +1. **Enable zone redundancy from the start** when creating new clusters in zone-capable regions +2. **Use all available zones** in the region (typically 3 zones) for maximum resilience +3. **Spread backend pools** across all zones configured on the load balancer +4. **Monitor zone health** and be prepared to handle zone failures +5. **Test failover scenarios** to ensure your cluster can survive zone outages +6. **Document your zone configuration** for disaster recovery procedures + +## Related Documentation + +- [Failure Domains](failure-domains.md) - Configure VMs across availability zones +- [API Server Endpoint](api-server-endpoint.md) - API server load balancer configuration +- [Azure Load Balancer Reliability](https://learn.microsoft.com/azure/reliability/reliability-load-balancer) - Azure official documentation diff --git a/templates/cluster-template-private.yaml b/templates/cluster-template-private.yaml index 786b6d52fc2..759fe70d84f 100644 --- a/templates/cluster-template-private.yaml +++ b/templates/cluster-template-private.yaml @@ -32,6 +32,10 @@ spec: location: ${AZURE_LOCATION} networkSpec: apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" name: ${CLUSTER_NAME}-internal-lb type: Internal controlPlaneOutboundLB: diff --git a/templates/flavors/private/patches/private-lb.yaml b/templates/flavors/private/patches/private-lb.yaml index 76e1539df2a..a2933e29963 100644 --- a/templates/flavors/private/patches/private-lb.yaml +++ b/templates/flavors/private/patches/private-lb.yaml @@ -7,6 +7,10 @@ spec: apiServerLB: name: ${CLUSTER_NAME}-internal-lb type: Internal + availabilityZones: + - "1" + - "2" + - "3" nodeOutboundLB: frontendIPsCount: 1 controlPlaneOutboundLB: diff --git a/templates/test/ci/cluster-template-prow-private.yaml b/templates/test/ci/cluster-template-prow-private.yaml index 26910fdcb43..47d6e99cc7a 100644 --- a/templates/test/ci/cluster-template-prow-private.yaml +++ b/templates/test/ci/cluster-template-prow-private.yaml @@ -49,6 +49,10 @@ spec: location: ${AZURE_LOCATION} networkSpec: apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" frontendIPs: - name: ${CLUSTER_NAME}-internal-lb-frontend privateIP: ${AZURE_INTERNAL_LB_IP} diff --git a/test/e2e/azure_test.go b/test/e2e/azure_test.go index 97eff3cfebf..e895ef2f20d 100644 --- a/test/e2e/azure_test.go +++ b/test/e2e/azure_test.go @@ -27,6 +27,8 @@ import ( "strings" "time" + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" "github.com/Azure/azure-service-operator/v2/pkg/common/config" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -37,6 +39,8 @@ import ( "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/controller-runtime/pkg/client" + + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" ) var _ = Describe("Workload cluster creation", func() { @@ -1427,5 +1431,154 @@ var _ = Describe("Workload cluster creation", func() { }) }) + Context("Creating a cluster with zone-redundant load balancers [OPTIONAL]", func() { + It("with zone-redundant API server, node outbound, and control plane outbound load balancers", func() { + clusterName = getClusterName(clusterNamePrefix, "lb-zones") + + // Set up zone-redundant load balancer configuration + Expect(os.Setenv("EXP_APISERVER_ILB", "true")).To(Succeed()) + Expect(os.Setenv("AZURE_INTERNAL_LB_PRIVATE_IP", "40.0.0.100")).To(Succeed()) + Expect(os.Setenv("AZURE_VNET_CIDR", "40.0.0.0/8")).To(Succeed()) + Expect(os.Setenv("AZURE_CP_SUBNET_CIDR", "40.0.0.0/16")).To(Succeed()) + Expect(os.Setenv("AZURE_NODE_SUBNET_CIDR", "40.1.0.0/16")).To(Succeed()) + Expect(os.Setenv("AZURE_LB_ZONES", "1,2,3")).To(Succeed()) + + clusterctl.ApplyClusterTemplateAndWait(ctx, createApplyClusterTemplateInput( + specName, + withFlavor("apiserver-ilb"), + withNamespace(namespace.Name), + withClusterName(clusterName), + withControlPlaneMachineCount(3), + withWorkerMachineCount(2), + withControlPlaneInterval(specName, "wait-control-plane-ha"), + withControlPlaneWaiters(clusterctl.ControlPlaneWaiters{ + WaitForControlPlaneInitialized: EnsureControlPlaneInitialized, + }), + withPostMachinesProvisioned(func() { + EnsureDaemonsets(ctx, func() DaemonsetsSpecInput { + return DaemonsetsSpecInput{ + BootstrapClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + ClusterName: clusterName, + } + }) + }), + ), result) + + By("Verifying load balancer zones are configured correctly in Azure", func() { + expectedZones := []string{"1", "2", "3"} + + subscriptionID := getSubscriptionID(Default) + cred, err := azidentity.NewDefaultAzureCredential(nil) + Expect(err).NotTo(HaveOccurred()) + + mgmtClient := bootstrapClusterProxy.GetClient() + Expect(mgmtClient).NotTo(BeNil()) + + azureCluster := &infrav1.AzureCluster{} + err = mgmtClient.Get(ctx, client.ObjectKey{ + Namespace: namespace.Name, + Name: clusterName, + }, azureCluster) + Expect(err).NotTo(HaveOccurred()) + + resourceGroupName := azureCluster.Spec.ResourceGroup + Expect(resourceGroupName).NotTo(BeEmpty()) + + lbClient, err := armnetwork.NewLoadBalancersClient(subscriptionID, cred, nil) + Expect(err).NotTo(HaveOccurred()) + + // Verify API Server Load Balancer zones + if azureCluster.Spec.NetworkSpec.APIServerLB != nil { + Expect(azureCluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones).To(Equal(expectedZones), + "APIServerLB should have zones configured in AzureCluster spec") + + lbName := azureCluster.Spec.NetworkSpec.APIServerLB.Name + Eventually(func(g Gomega) { + lb, err := lbClient.Get(ctx, resourceGroupName, lbName, nil) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(lb.Properties).NotTo(BeNil()) + g.Expect(lb.Properties.FrontendIPConfigurations).NotTo(BeEmpty()) + + for _, frontendIP := range lb.Properties.FrontendIPConfigurations { + g.Expect(frontendIP.Zones).NotTo(BeNil(), "Frontend IP should have zones configured") + g.Expect(frontendIP.Zones).To(HaveLen(3), "Frontend IP should have 3 zones") + + zonesMap := make(map[string]bool) + for _, zone := range frontendIP.Zones { + if zone != nil { + zonesMap[*zone] = true + } + } + for _, expectedZone := range expectedZones { + g.Expect(zonesMap[expectedZone]).To(BeTrue(), "Zone %s should be configured", expectedZone) + } + } + }, retryableOperationTimeout, retryableOperationSleepBetweenRetries).Should(Succeed()) + } + + // Verify Node Outbound Load Balancer zones + if azureCluster.Spec.NetworkSpec.NodeOutboundLB != nil { + Expect(azureCluster.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones).To(Equal(expectedZones), + "NodeOutboundLB should have zones configured in AzureCluster spec") + + lbName := azureCluster.Spec.NetworkSpec.NodeOutboundLB.Name + Eventually(func(g Gomega) { + lb, err := lbClient.Get(ctx, resourceGroupName, lbName, nil) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(lb.Properties).NotTo(BeNil()) + g.Expect(lb.Properties.FrontendIPConfigurations).NotTo(BeEmpty()) + + for _, frontendIP := range lb.Properties.FrontendIPConfigurations { + g.Expect(frontendIP.Zones).NotTo(BeNil(), "Frontend IP should have zones configured") + g.Expect(frontendIP.Zones).To(HaveLen(3), "Frontend IP should have 3 zones") + + zonesMap := make(map[string]bool) + for _, zone := range frontendIP.Zones { + if zone != nil { + zonesMap[*zone] = true + } + } + for _, expectedZone := range expectedZones { + g.Expect(zonesMap[expectedZone]).To(BeTrue(), "Zone %s should be configured", expectedZone) + } + } + }, retryableOperationTimeout, retryableOperationSleepBetweenRetries).Should(Succeed()) + } + + // Verify Control Plane Outbound Load Balancer zones + if azureCluster.Spec.NetworkSpec.ControlPlaneOutboundLB != nil { + Expect(azureCluster.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones).To(Equal(expectedZones), + "ControlPlaneOutboundLB should have zones configured in AzureCluster spec") + + lbName := azureCluster.Spec.NetworkSpec.ControlPlaneOutboundLB.Name + Eventually(func(g Gomega) { + lb, err := lbClient.Get(ctx, resourceGroupName, lbName, nil) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(lb.Properties).NotTo(BeNil()) + g.Expect(lb.Properties.FrontendIPConfigurations).NotTo(BeEmpty()) + + for _, frontendIP := range lb.Properties.FrontendIPConfigurations { + g.Expect(frontendIP.Zones).NotTo(BeNil(), "Frontend IP should have zones configured") + g.Expect(frontendIP.Zones).To(HaveLen(3), "Frontend IP should have 3 zones") + + zonesMap := make(map[string]bool) + for _, zone := range frontendIP.Zones { + if zone != nil { + zonesMap[*zone] = true + } + } + for _, expectedZone := range expectedZones { + g.Expect(zonesMap[expectedZone]).To(BeTrue(), "Zone %s should be configured", expectedZone) + } + } + }, retryableOperationTimeout, retryableOperationSleepBetweenRetries).Should(Succeed()) + } + }) + + By("PASSED!") + }) + }) + // TODO: add a same test as above for a windows cluster })