/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > alertmanager.rules
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > general.rules
|
alert: Watchdog
expr: vector(1)
labels:
severity: none
annotations:
message: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
Labels |
State |
Active Since |
Value |
alertname="Watchdog"
severity="none"
|
firing |
2024-12-18 12:33:45.022073385 +0000 UTC |
1 |
Annotations |
- message
- This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kube-apiserver-slos
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kube-state-metrics
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-apps
|
Labels |
State |
Active Since |
Value |
alertname="KubeDeploymentReplicasMismatch"
deployment="drep-database-layer"
instance="10.1.134.232:8443"
job="kube-state-metrics"
namespace="digilex-cicd"
severity="warning"
|
firing |
2025-03-13 17:16:05.095800493 +0000 UTC |
1 |
Annotations |
- message
- Deployment digilex-cicd/drep-database-layer has not matched the expected number of replicas for longer than 15 minutes.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
|
|
Labels |
State |
Active Since |
Value |
alertname="KubeContainerWaiting"
container="database-layer"
namespace="digilex-cicd"
pod="drep-database-layer-6c78c9fb5-8rjw9"
severity="warning"
|
pending |
2025-03-13 17:47:05.095800493 +0000 UTC |
1 |
Annotations |
- message
- Pod digilex-cicd/drep-database-layer-6c78c9fb5-8rjw9 container database-layer has been in waiting state for longer than 1 hour.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
|
|
Labels |
State |
Active Since |
Value |
alertname="KubePodCrashLooping"
container="database-layer"
instance="10.1.134.232:8443"
job="kube-state-metrics"
namespace="digilex-cicd"
pod="drep-database-layer-6c78c9fb5-8rjw9"
severity="warning"
|
pending |
2025-03-13 17:46:35.095800493 +0000 UTC |
1.1111111111111112 |
Annotations |
- message
- Pod digilex-cicd/drep-database-layer-6c78c9fb5-8rjw9 (database-layer) is restarting 1.11 times / 5 minutes.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
|
|
|
|
|
|
|
|
alert: KubeJobFailed
expr: kube_job_failed{job="kube-state-metrics"} > 0
for: 15m
labels:
severity: warning
annotations:
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-resources
|
Labels |
State |
Active Since |
Value |
alertname="CPUThrottlingHigh"
container="node-exporter"
namespace="monitoring"
pod="node-exporter-5chcc"
severity="info"
|
firing |
2024-12-18 12:35:47.55260138 +0000 UTC |
0.4852941176470588 |
Annotations |
- message
- 48.53% throttling of CPU in namespace monitoring for container node-exporter in pod node-exporter-5chcc.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
Labels |
State |
Active Since |
Value |
alertname="KubeCPUOvercommit"
severity="warning"
|
firing |
2025-03-13 17:16:17.55260138 +0000 UTC |
0.6817500000000001 |
Annotations |
- message
- Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
|
Labels |
State |
Active Since |
Value |
alertname="KubeMemoryOvercommit"
severity="warning"
|
firing |
2025-03-13 17:16:17.55260138 +0000 UTC |
0.19758578791971212 |
Annotations |
- message
- Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-storage
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system-apiserver
|
Labels |
State |
Active Since |
Value |
alertname="AggregatedAPIDown"
name="v1.packages.operators.coreos.com"
namespace="default"
severity="warning"
|
firing |
2025-03-13 16:36:04.774722027 +0000 UTC |
40 |
Annotations |
- message
- An aggregated API v1.packages.operators.coreos.com/default has been only 40% available over the last 5m.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
|
|
alert: AggregatedAPIErrors
expr: sum by(name, namespace) (increase(aggregator_unavailable_apiservice_count[5m])) > 2
labels:
severity: warning
annotations:
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
|
alert: KubeAPIDown
expr: absent(up{job="apiserver"} == 1)
for: 15m
labels:
severity: critical
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system-controller-manager
|
Labels |
State |
Active Since |
Value |
alertname="KubeControllerManagerDown"
severity="critical"
|
firing |
2024-03-19 09:22:14 +0000 UTC |
1 |
Annotations |
- message
- KubeControllerManager has disappeared from Prometheus target discovery.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system-kubelet
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system-scheduler
|
alert: KubeSchedulerDown
expr: absent(up{job="kube-scheduler"} == 1)
for: 15m
labels:
severity: critical
annotations:
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
Labels |
State |
Active Since |
Value |
alertname="KubeSchedulerDown"
severity="critical"
|
firing |
2024-03-19 09:22:02 +0000 UTC |
1 |
Annotations |
- message
- KubeScheduler has disappeared from Prometheus target discovery.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > node-exporter
|
alert: NodeClockNotSynchronising
expr: min_over_time(node_timex_sync_status[5m]) == 0
for: 10m
labels:
severity: warning
annotations:
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
summary: Clock not synchronising.
Labels |
State |
Active Since |
Value |
alertname="NodeClockNotSynchronising"
endpoint="https"
instance="siccapp"
job="node-exporter"
namespace="monitoring"
pod="node-exporter-5chcc"
service="node-exporter"
severity="warning"
|
firing |
2024-09-06 18:12:46 +0000 UTC |
0 |
Annotations |
- message
- Clock on siccapp is not synchronising. Ensure NTP is configured on this host.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
- summary
- Clock not synchronising.
|
|
|
|
|
|
|
|
|
|
|
|
alert: NodeNetworkReceiveErrs
expr: increase(node_network_receive_errs_total[2m]) > 10
for: 1h
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors.
|
alert: NodeNetworkTransmitErrs
expr: increase(node_network_transmit_errs_total[2m]) > 10
for: 1h
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors.
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > node-network
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > prometheus
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > prometheus-operator
|
|
|
|
|