From 4de0ff086ab0b09f9c11675043cfc12188bdd972 Mon Sep 17 00:00:00 2001 From: Pavel Tishkov Date: Tue, 16 Sep 2025 08:49:08 +0300 Subject: [PATCH] feat(module): add DVCR capacity alert Signed-off-by: Pavel Tishkov --- monitoring/prometheus-rules/dvcr.yaml | 33 +++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/monitoring/prometheus-rules/dvcr.yaml b/monitoring/prometheus-rules/dvcr.yaml index b5285279f1..c1da19bb4b 100644 --- a/monitoring/prometheus-rules/dvcr.yaml +++ b/monitoring/prometheus-rules/dvcr.yaml @@ -10,8 +10,8 @@ plk_protocol_version: "1" plk_markup_format: "markdown" plk_labels_as_annotations: "pod" - plk_create_group_if_not_exists__d8_virtualization_health: "D8VirtualizationHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" - plk_grouped_by__d8_virtualization_health: "D8VirtualizationHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" + plk_create_group_if_not_exists__d8_virtualization_dvcr_health: "D8VirtualizationDVCRHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" + plk_grouped_by__d8_virtualization_dvcr_health: "D8VirtualizationDVCRHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" summary: The dvcr Pod is NOT Ready. description: | The recommended course of action: @@ -27,10 +27,35 @@ annotations: plk_protocol_version: "1" plk_markup_format: "markdown" - plk_create_group_if_not_exists__d8_virtualization_health: "D8VirtualizationHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" - plk_grouped_by__d8_virtualization_health: "D8VirtualizationHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" + plk_create_group_if_not_exists__d8_virtualization_dvcr_health: "D8VirtualizationDVCRHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" + plk_grouped_by__d8_virtualization_dvcr_health: "D8VirtualizationDVCRHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" summary: The dvcr Pod is NOT Running. description: | The recommended course of action: 1. Retrieve details of the Deployment: `kubectl -n d8-virtualization describe deploy dvcr` 2. View the status of the Pod and try to figure out why it is not running: `kubectl -n d8-virtualization describe pod -l app=dvcr` + + - alert: D8VirtualizationDVCRInsufficientCapacityRisk + expr: | + min by (persistentvolumeclaim, namespace) ( + (kubelet_volume_stats_available_bytes{namespace="d8-virtualization", persistentvolumeclaim="dvcr"} < 5 * 1024 * 1024 * 1024) + or + (kubelet_volume_stats_available_bytes{namespace="d8-virtualization", persistentvolumeclaim="dvcr"} / (kubelet_volume_stats_capacity_bytes / 100) < 20) + ) + for: 10m + labels: + severity_level: "6" + tier: cluster + annotations: + plk_protocol_version: "1" + plk_markup_format: "markdown" + plk_create_group_if_not_exists__d8_virtualization_dvcr_health: "D8VirtualizationDVCRHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" + plk_grouped_by__d8_virtualization_dvcr_health: "D8VirtualizationDVCRHealth,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes" + summary: Risk of insufficient DVCR storage capacity. + description: | + The DVCR storage (PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }}) has less than 5GB or 20% free space, risking failure to create new images or disks. Available: {{ $value | humanize }} bytes. + + The recommended course of action: + + - Check PVC details: `d8 k -n d8-virtualization describe pvc dvcr` + - Extend DVCR storage size in the virtualization module settings or remove unnecessary images (ClusterVirtualImage\VirtualImage).