Skip to content

Commit

Permalink
🐛elbv2: wait for LB active state instead of resolving DNS name
Browse files Browse the repository at this point in the history
Using DNS name resolution as a way to check the load balancer is working
can cause problems that are dependent on the host running CAPA. In some
systems, the DNS resolution can fail with very large TTLs cached DNS
responses, causing very long provisioning times.

Instead of DNS resolution, let's use the AWS API to check for the load
balancer "active" state. Waiting for resolvable DNS names should be left
for the clients to do.
  • Loading branch information
r4f4 committed Sep 24, 2024
1 parent c2c96dd commit a880668
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 33 deletions.
8 changes: 0 additions & 8 deletions controllers/awscluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package controllers
import (
"context"
"fmt"
"net"
"time"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -288,13 +287,6 @@ func (r *AWSClusterReconciler) reconcileLoadBalancer(clusterScope *scope.Cluster
return &retryAfterDuration, nil
}

clusterScope.Debug("Looking up IP address for DNS", "dns", awsCluster.Status.Network.APIServerELB.DNSName)
if _, err := net.LookupIP(awsCluster.Status.Network.APIServerELB.DNSName); err != nil {
clusterScope.Error(err, "failed to get IP address for dns name", "dns", awsCluster.Status.Network.APIServerELB.DNSName)
conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.WaitForDNSNameResolveReason, clusterv1.ConditionSeverityInfo, "")
clusterScope.Info("Waiting on API server ELB DNS name to resolve")
return &retryAfterDuration, nil
}
conditions.MarkTrue(awsCluster, infrav1.LoadBalancerReadyCondition)

awsCluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{
Expand Down
25 changes: 0 additions & 25 deletions controllers/awscluster_controller_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,31 +395,6 @@ func TestAWSClusterReconcileOperations(t *testing.T) {
g.Expect(err).To(BeNil())
expectAWSClusterConditions(g, cs.AWSCluster, []conditionAssertion{{infrav1.LoadBalancerReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitForDNSNameReason}})
})
t.Run("Should fail AWSCluster create with LoadBalancer reconcile failure with WaitForDNSNameResolve condition as false", func(t *testing.T) {
g := NewWithT(t)
awsCluster := getAWSCluster("test", "test")
runningCluster := func() {
networkSvc.EXPECT().ReconcileNetwork().Return(nil)
sgSvc.EXPECT().ReconcileSecurityGroups().Return(nil)
ec2Svc.EXPECT().ReconcileBastion().Return(nil)
elbSvc.EXPECT().ReconcileLoadbalancers().Return(nil)
}
csClient := setup(t, &awsCluster)
defer teardown()
runningCluster()
cs, err := scope.NewClusterScope(
scope.ClusterScopeParams{
Client: csClient,
Cluster: &clusterv1.Cluster{},
AWSCluster: &awsCluster,
},
)
awsCluster.Status.Network.APIServerELB.DNSName = "test-apiserver.us-east-1.aws"
g.Expect(err).To(BeNil())
_, err = reconciler.reconcileNormal(cs)
g.Expect(err).To(BeNil())
expectAWSClusterConditions(g, cs.AWSCluster, []conditionAssertion{{infrav1.LoadBalancerReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitForDNSNameResolveReason}})
})
})
})
t.Run("Reconcile delete AWSCluster", func(t *testing.T) {
Expand Down
3 changes: 3 additions & 0 deletions controllers/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,9 @@ func mockedCreateLBV2Calls(t *testing.T, m *mocks.MockELBV2APIMockRecorder) {
LoadBalancerArn: lbArn,
SecurityGroups: aws.StringSlice([]string{"sg-apiserver-lb"}),
})).MaxTimes(1)
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
LoadBalancerArns: []*string{lbArn},
})).MaxTimes(1)
}

func mockedDescribeTargetGroupsCall(t *testing.T, m *mocks.MockELBV2APIMockRecorder) {
Expand Down
11 changes: 11 additions & 0 deletions pkg/cloud/services/elb/loadbalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,17 @@ func (s *Service) reconcileV2LB(lbSpec *infrav1.AWSLoadBalancerSpec) error {
return err
}

wReq := &elbv2.DescribeLoadBalancersInput{
LoadBalancerArns: aws.StringSlice([]string{lb.ARN}),
}
s.scope.Debug("Waiting for LB to become active", "api-server-lb-name", lb.Name)
waitStart := time.Now()
if err := s.ELBV2Client.WaitUntilLoadBalancerAvailableWithContext(context.TODO(), wReq); err != nil {
s.scope.Error(err, "failed to wait for LB to become available", "time", time.Since(waitStart))
return err
}
s.scope.Debug("LB reports active state", "api-server-lb-name", lb.Name, "time", time.Since(waitStart))

// set up the type for later processing
lb.LoadBalancerType = lbSpec.LoadBalancerType
if lb.IsManaged(s.scope.Name()) {
Expand Down
13 changes: 13 additions & 0 deletions pkg/cloud/services/elb/loadbalancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2275,6 +2275,9 @@ func TestReconcileV2LB(t *testing.T) {
},
nil,
)
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
LoadBalancerArns: aws.StringSlice([]string{elbArn}),
})).Return(nil)
},
check: func(t *testing.T, lb *infrav1.LoadBalancer, err error) {
t.Helper()
Expand Down Expand Up @@ -2476,6 +2479,10 @@ func TestReconcileV2LB(t *testing.T) {
LoadBalancerArn: aws.String(elbArn),
Subnets: []*string{},
}).Return(&elbv2.SetSubnetsOutput{}, nil)

m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
LoadBalancerArns: aws.StringSlice([]string{elbArn}),
})).Return(nil)
},
check: func(t *testing.T, lb *infrav1.LoadBalancer, err error) {
t.Helper()
Expand Down Expand Up @@ -2658,6 +2665,12 @@ func TestReconcileLoadbalancers(t *testing.T) {
},
nil,
)
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
LoadBalancerArns: aws.StringSlice([]string{elbArn}),
})).Return(nil)
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
LoadBalancerArns: aws.StringSlice([]string{secondElbArn}),
})).Return(nil)
},
check: func(t *testing.T, firstLB *infrav1.LoadBalancer, secondLB *infrav1.LoadBalancer, err error) {
t.Helper()
Expand Down

0 comments on commit a880668

Please sign in to comment.