{{- if and .Values.prometheusRule.enabled .Values.serviceMonitor.enabled }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: {{ include "opentelemetry-collector.fullname" . }} namespace: {{ template "opentelemetry-collector.namespace" . }} labels: {{- include "opentelemetry-collector.labels" . | nindent 4 }} {{- range $key, $value := .Values.prometheusRule.extraLabels }} {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} {{- end }} spec: groups: {{- if .Values.prometheusRule.groups }} {{- toYaml .Values.prometheusRule.groups | nindent 4 }} {{- end }} {{- if .Values.prometheusRule.defaultRules.enabled }} - name: collectorRules rules: - alert: ReceiverDroppedSpans expr: rate(otelcol_receiver_refused_spans[5m]) > 0 for: 2m labels: severity: critical annotations: description: '{{`The {{ $labels.receiver }} receiver is dropping spans at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures' - alert: ReceiverDroppedMetrics expr: rate(otelcol_receiver_refused_metric_points[5m]) > 0 for: 2m labels: severity: critical annotations: description: '{{`The {{ $labels.receiver }} receiver is dropping metrics at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures' - alert: ReceiverDroppedLogs expr: rate(otelcol_receiver_refused_log_records[5m]) > 0 for: 5m labels: severity: critical annotations: description: '{{` The {{ $labels.receiver }} is dropping logs at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures' - alert: ProcessorDroppedSpans expr: rate(otelcol_processor_dropped_spans[5m]) > 0 for: 2m labels: severity: critical annotations: description: '{{`The {{ $labels.processor }} processor is dropping spans at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss' - alert: ProcessorDroppedMetrics expr: rate(otelcol_processor_dropped_metric_points[5m]) > 0 for: 2m labels: severity: critical annotations: description: '{{`The {{ $labels.processor }} processor is dropping metrics at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss' - alert: ProcessorDroppedLogs expr: rate(otelcol_processor_dropped_log_records[5m]) > 0 for: 5m labels: severity: critical annotations: description: '{{` The {{ $labels.processor }} is dropping logs at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss' - alert: ExporterDroppedSpans expr: rate(otelcol_exporter_send_failed_spans[5m]) > 0 for: 2m labels: severity: critical annotations: description: '{{`The {{ $labels.exporter }} exporter is dropping spans at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress' - alert: ExporterDroppedMetrics expr: rate(otelcol_exporter_send_failed_metric_points[5m]) > 0 for: 2m labels: severity: critical annotations: description: '{{`The {{ $labels.exporter }} exporter is dropping metrics at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress' - alert: ExporterDroppedLogs expr: rate(otelcol_exporter_send_failed_log_records[5m]) > 0 for: 5m labels: severity: critical annotations: description: '{{` The {{ $labels.exporter }} is dropping logs at a rate of {{ humanize $value }} per second `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress' - alert: ExporterQueueSize expr: otelcol_exporter_queue_size > 5000 for: 1m labels: severity: warning annotations: description: '{{`The {{ $labels.exporter }} queue has reached a size of {{ $value }} `}}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#queue-length' {{- $signals := list "spans" "metric_points" "log_records" }} {{- range $signal := $signals }} - alert: SendQueueFailed{{ $signal }} expr: rate(otelcol_exporter_enqueue_failed_{{ $signal }}[5m]) > 0 for: 1m labels: severity: warning annotations: description: '{{`The {{ $labels.exporter }} sending queue failed to accept {{ $value }} `}} {{ $signal }}' runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#queue-length' {{- end }} {{- end }} {{- end }}