Skip to content

Commit 2f3fb07

Browse files
committed
Include prometheus and alertmanager status in monitor status
* Include prometheus and alertmanager status in monitor status The monitor should be unavailable if there is a prometheus or alertmanager instance that is unavailable. This handles the case where the prometheus install failed, and the statefulsets haven't been created yet. * monitor: Only check prometheus / alertmanager Available status
1 parent c016393 commit 2f3fb07

File tree

3 files changed

+203
-2
lines changed

3 files changed

+203
-2
lines changed

pkg/controller/monitor/monitor_controller.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
crdv1 "github.com/tigera/operator/pkg/apis/crd.projectcalico.org/v1"
2424

25+
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2526
corev1 "k8s.io/api/core/v1"
2627
"k8s.io/apimachinery/pkg/api/errors"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -430,6 +431,48 @@ func (r *ReconcileMonitor) Reconcile(ctx context.Context, request reconcile.Requ
430431
}
431432
}
432433

434+
p, err := utils.GetPrometheus(ctx, r.client)
435+
if err != nil {
436+
r.status.SetDegraded(operatorv1.ResourceReadError, "An error occurred trying to retrieve the Prometheus status", err, reqLogger)
437+
return reconcile.Result{}, err
438+
}
439+
440+
if p != nil {
441+
available := monitoringv1.ConditionFalse
442+
443+
for _, cond := range p.Status.Conditions {
444+
if cond.Type == monitoringv1.Available {
445+
available = cond.Status
446+
}
447+
}
448+
449+
if available != monitoringv1.ConditionTrue {
450+
r.status.SetDegraded(operatorv1.ResourceNotReady, "Prometheus component is not available", err, reqLogger)
451+
return reconcile.Result{}, err
452+
}
453+
}
454+
455+
am, err := utils.GetAlertmanager(ctx, r.client)
456+
if err != nil {
457+
r.status.SetDegraded(operatorv1.ResourceReadError, "An error occurred trying to retrieve the Alertmanager status", err, reqLogger)
458+
return reconcile.Result{}, err
459+
}
460+
461+
if am != nil {
462+
available := monitoringv1.ConditionFalse
463+
464+
for _, cond := range am.Status.Conditions {
465+
if cond.Type == monitoringv1.Available {
466+
available = cond.Status
467+
}
468+
}
469+
470+
if available != monitoringv1.ConditionTrue {
471+
r.status.SetDegraded(operatorv1.ResourceNotReady, "Alertmanager component is not available", err, reqLogger)
472+
return reconcile.Result{}, err
473+
}
474+
}
475+
433476
// Tell the status manager that we're ready to monitor the resources we've told it about and receive statuses.
434477
r.status.ReadyToMonitor()
435478

pkg/controller/monitor/monitor_controller_test.go

Lines changed: 134 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2021-2024 Tigera, Inc. All rights reserved.
1+
// Copyright (c) 2021-2025 Tigera, Inc. All rights reserved.
22

33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -68,10 +68,14 @@ var _ = Describe("Monitor controller tests", func() {
6868
Expect(apis.AddToScheme(scheme)).NotTo(HaveOccurred())
6969
Expect(appsv1.SchemeBuilder.AddToScheme(scheme)).NotTo(HaveOccurred())
7070
Expect(rbacv1.SchemeBuilder.AddToScheme(scheme)).NotTo(HaveOccurred())
71+
Expect(monitoringv1.AddToScheme(scheme)).NotTo(HaveOccurred())
7172

7273
// Create a client that will have a crud interface of k8s objects.
7374
ctx = context.Background()
74-
cli = ctrlrfake.DefaultFakeClientBuilder(scheme).Build()
75+
cli = ctrlrfake.DefaultFakeClientBuilder(scheme).
76+
WithStatusSubresource(&monitoringv1.Prometheus{}).
77+
WithStatusSubresource(&monitoringv1.Alertmanager{}).
78+
Build()
7579

7680
// Create an object we can use throughout the test to do the monitor reconcile loops.
7781
mockStatus = &status.MockStatus{}
@@ -85,6 +89,7 @@ var _ = Describe("Monitor controller tests", func() {
8589
mockStatus.On("ReadyToMonitor")
8690
mockStatus.On("RemoveDeployments", mock.Anything)
8791
mockStatus.On("RemoveCertificateSigningRequests", common.TigeraPrometheusNamespace)
92+
mockStatus.On("SetDegraded", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return()
8893
mockStatus.On("SetMetaData", mock.Anything).Return()
8994

9095
// Create an object we can use throughout the test to do the monitor reconcile loops.
@@ -135,6 +140,133 @@ var _ = Describe("Monitor controller tests", func() {
135140
r.tierWatchReady.MarkAsReady()
136141
})
137142

143+
Context("prometheus resources", func() {
144+
BeforeEach(func() {
145+
// Add the Prometheus and Alertmanager instances
146+
prom := &monitoringv1.Prometheus{
147+
ObjectMeta: metav1.ObjectMeta{
148+
Name: monitor.CalicoNodePrometheus,
149+
Namespace: common.TigeraPrometheusNamespace,
150+
},
151+
}
152+
Expect(cli.Create(ctx, prom)).To(BeNil())
153+
154+
prom.Status = monitoringv1.PrometheusStatus{
155+
Conditions: []monitoringv1.Condition{
156+
{
157+
Type: monitoringv1.Available,
158+
Status: monitoringv1.ConditionTrue,
159+
},
160+
{
161+
Type: monitoringv1.Reconciled,
162+
Status: monitoringv1.ConditionTrue,
163+
},
164+
},
165+
}
166+
Expect(cli.Status().Update(ctx, prom)).To(Succeed())
167+
168+
alertManager := &monitoringv1.Alertmanager{
169+
ObjectMeta: metav1.ObjectMeta{
170+
Name: monitor.CalicoNodeAlertmanager,
171+
Namespace: common.TigeraPrometheusNamespace,
172+
},
173+
}
174+
Expect(cli.Create(ctx, alertManager)).To(BeNil())
175+
176+
alertManager.Status = monitoringv1.AlertmanagerStatus{
177+
Conditions: []monitoringv1.Condition{
178+
{
179+
Type: monitoringv1.Available,
180+
Status: monitoringv1.ConditionTrue,
181+
},
182+
{
183+
Type: monitoringv1.Reconciled,
184+
Status: monitoringv1.ConditionTrue,
185+
},
186+
},
187+
}
188+
Expect(cli.Status().Update(ctx, alertManager)).To(Succeed())
189+
})
190+
191+
It("should be ready if the prometheus statefulset is ready", func() {
192+
_, err := r.Reconcile(ctx, reconcile.Request{})
193+
Expect(err).ShouldNot(HaveOccurred())
194+
195+
mockStatus.AssertNotCalled(GinkgoT(), "SetDegraded",
196+
operatorv1.ResourceNotReady,
197+
mock.Anything,
198+
mock.Anything,
199+
mock.Anything,
200+
)
201+
})
202+
203+
It("should be ready if the alertmanager statefulset is ready", func() {
204+
_, err := r.Reconcile(ctx, reconcile.Request{})
205+
Expect(err).ShouldNot(HaveOccurred())
206+
207+
mockStatus.AssertNotCalled(GinkgoT(), "SetDegraded",
208+
operatorv1.ResourceNotReady,
209+
mock.Anything,
210+
mock.Anything,
211+
mock.Anything,
212+
)
213+
})
214+
215+
It("should degrade if the prometheus statefulset isn't ready", func() {
216+
prom := &monitoringv1.Prometheus{}
217+
Expect(cli.Get(ctx, client.ObjectKey{Name: monitor.CalicoNodePrometheus, Namespace: common.TigeraPrometheusNamespace}, prom)).NotTo(HaveOccurred())
218+
219+
prom.Status.Conditions = []monitoringv1.Condition{
220+
{
221+
Type: monitoringv1.Available,
222+
Status: monitoringv1.ConditionFalse,
223+
},
224+
{
225+
Type: monitoringv1.Reconciled,
226+
Status: monitoringv1.ConditionTrue,
227+
},
228+
}
229+
Expect(cli.Status().Update(ctx, prom)).To(Succeed())
230+
231+
_, err := r.Reconcile(ctx, reconcile.Request{})
232+
Expect(err).ShouldNot(HaveOccurred())
233+
234+
mockStatus.AssertCalled(GinkgoT(), "SetDegraded",
235+
operatorv1.ResourceNotReady,
236+
"Prometheus component is not available",
237+
mock.Anything,
238+
mock.Anything,
239+
)
240+
})
241+
242+
It("should degrade if the alertmanager statefulset isn't ready", func() {
243+
alertManager := &monitoringv1.Alertmanager{}
244+
Expect(cli.Get(ctx, client.ObjectKey{Name: monitor.CalicoNodeAlertmanager, Namespace: common.TigeraPrometheusNamespace}, alertManager)).NotTo(HaveOccurred())
245+
246+
alertManager.Status.Conditions = []monitoringv1.Condition{
247+
{
248+
Type: monitoringv1.Available,
249+
Status: monitoringv1.ConditionFalse,
250+
},
251+
{
252+
Type: monitoringv1.Reconciled,
253+
Status: monitoringv1.ConditionTrue,
254+
},
255+
}
256+
Expect(cli.Status().Update(ctx, alertManager)).To(Succeed())
257+
258+
_, err := r.Reconcile(ctx, reconcile.Request{})
259+
Expect(err).ShouldNot(HaveOccurred())
260+
261+
mockStatus.AssertCalled(GinkgoT(), "SetDegraded",
262+
operatorv1.ResourceNotReady,
263+
"Alertmanager component is not available",
264+
mock.Anything,
265+
mock.Anything,
266+
)
267+
})
268+
})
269+
138270
Context("controller reconciliation", func() {
139271
var (
140272
am = &monitoringv1.Alertmanager{}

pkg/controller/utils/utils.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
esv1 "github.com/elastic/cloud-on-k8s/v2/pkg/apis/elasticsearch/v1"
2626
"github.com/elastic/cloud-on-k8s/v2/pkg/utils/stringsutil"
27+
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2728
csiv1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1"
2829

2930
"github.com/go-logr/logr"
@@ -57,6 +58,7 @@ import (
5758
"github.com/tigera/operator/pkg/ctrlruntime"
5859
"github.com/tigera/operator/pkg/render"
5960
"github.com/tigera/operator/pkg/render/logstorage/eck"
61+
"github.com/tigera/operator/pkg/render/monitor"
6062
)
6163

6264
const (
@@ -858,6 +860,30 @@ func GetElasticsearch(ctx context.Context, c client.Client) (*esv1.Elasticsearch
858860
return &es, nil
859861
}
860862

863+
func GetAlertmanager(ctx context.Context, c client.Client) (*monitoringv1.Alertmanager, error) {
864+
a := monitoringv1.Alertmanager{}
865+
err := c.Get(ctx, client.ObjectKey{Name: monitor.CalicoNodeAlertmanager, Namespace: common.TigeraPrometheusNamespace}, &a)
866+
if err != nil {
867+
if errors.IsNotFound(err) {
868+
return nil, nil
869+
}
870+
return nil, err
871+
}
872+
return &a, nil
873+
}
874+
875+
func GetPrometheus(ctx context.Context, c client.Client) (*monitoringv1.Prometheus, error) {
876+
p := monitoringv1.Prometheus{}
877+
err := c.Get(ctx, client.ObjectKey{Name: monitor.CalicoNodePrometheus, Namespace: common.TigeraPrometheusNamespace}, &p)
878+
if err != nil {
879+
if errors.IsNotFound(err) {
880+
return nil, nil
881+
}
882+
return nil, err
883+
}
884+
return &p, nil
885+
}
886+
861887
// AddKubeProxyWatch creates a watch on the kube-proxy DaemonSet.
862888
func AddKubeProxyWatch(c ctrlruntime.Controller) error {
863889
ds := &appsv1.DaemonSet{

0 commit comments

Comments
 (0)