123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112 |
- {{- if and .Values.prometheusRule.enabled .Values.serviceMonitor.enabled }}
- apiVersion: monitoring.coreos.com/v1
- kind: PrometheusRule
- metadata:
- name: {{ include "opentelemetry-collector.fullname" . }}
- namespace: {{ template "opentelemetry-collector.namespace" . }}
- labels:
- {{- include "opentelemetry-collector.labels" . | nindent 4 }}
- {{- range $key, $value := .Values.prometheusRule.extraLabels }}
- {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }}
- {{- end }}
- spec:
- groups:
- {{- if .Values.prometheusRule.groups }}
- {{- toYaml .Values.prometheusRule.groups | nindent 4 }}
- {{- end }}
- {{- if .Values.prometheusRule.defaultRules.enabled }}
- - name: collectorRules
- rules:
- - alert: ReceiverDroppedSpans
- expr: rate(otelcol_receiver_refused_spans[5m]) > 0
- for: 2m
- labels:
- severity: critical
- annotations:
- description: '{{`The {{ $labels.receiver }} receiver is dropping spans at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures'
- - alert: ReceiverDroppedMetrics
- expr: rate(otelcol_receiver_refused_metric_points[5m]) > 0
- for: 2m
- labels:
- severity: critical
- annotations:
- description: '{{`The {{ $labels.receiver }} receiver is dropping metrics at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures'
- - alert: ReceiverDroppedLogs
- expr: rate(otelcol_receiver_refused_log_records[5m]) > 0
- for: 5m
- labels:
- severity: critical
- annotations:
- description: '{{` The {{ $labels.receiver }} is dropping logs at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures'
- - alert: ProcessorDroppedSpans
- expr: rate(otelcol_processor_dropped_spans[5m]) > 0
- for: 2m
- labels:
- severity: critical
- annotations:
- description: '{{`The {{ $labels.processor }} processor is dropping spans at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss'
- - alert: ProcessorDroppedMetrics
- expr: rate(otelcol_processor_dropped_metric_points[5m]) > 0
- for: 2m
- labels:
- severity: critical
- annotations:
- description: '{{`The {{ $labels.processor }} processor is dropping metrics at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss'
- - alert: ProcessorDroppedLogs
- expr: rate(otelcol_processor_dropped_log_records[5m]) > 0
- for: 5m
- labels:
- severity: critical
- annotations:
- description: '{{` The {{ $labels.processor }} is dropping logs at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss'
- - alert: ExporterDroppedSpans
- expr: rate(otelcol_exporter_send_failed_spans[5m]) > 0
- for: 2m
- labels:
- severity: critical
- annotations:
- description: '{{`The {{ $labels.exporter }} exporter is dropping spans at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress'
- - alert: ExporterDroppedMetrics
- expr: rate(otelcol_exporter_send_failed_metric_points[5m]) > 0
- for: 2m
- labels:
- severity: critical
- annotations:
- description: '{{`The {{ $labels.exporter }} exporter is dropping metrics at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress'
- - alert: ExporterDroppedLogs
- expr: rate(otelcol_exporter_send_failed_log_records[5m]) > 0
- for: 5m
- labels:
- severity: critical
- annotations:
- description: '{{` The {{ $labels.exporter }} is dropping logs at a rate of {{ humanize $value }} per second `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress'
- - alert: ExporterQueueSize
- expr: otelcol_exporter_queue_size > 5000
- for: 1m
- labels:
- severity: warning
- annotations:
- description: '{{`The {{ $labels.exporter }} queue has reached a size of {{ $value }} `}}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#queue-length'
- {{- $signals := list "spans" "metric_points" "log_records" }}
- {{- range $signal := $signals }}
- - alert: SendQueueFailed{{ $signal }}
- expr: rate(otelcol_exporter_enqueue_failed_{{ $signal }}[5m]) > 0
- for: 1m
- labels:
- severity: warning
- annotations:
- description: '{{`The {{ $labels.exporter }} sending queue failed to accept {{ $value }} `}} {{ $signal }}'
- runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#queue-length'
- {{- end }}
- {{- end }}
- {{- end }}
|