|
@@ -2,203 +2,203 @@
|
|
|
rules:
|
|
|
# Broker metrics
|
|
|
|
|
|
- - bean: kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.message.count
|
|
|
- type: counter
|
|
|
- desc: The number of messages received by the broker
|
|
|
- unit: '{messages}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=BrokerTopicMetrics,name=TotalFetchRequestsPerSec
|
|
|
- metricAttribute:
|
|
|
- type: const(fetch)
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.request.count
|
|
|
- type: counter
|
|
|
- desc: The number of requests received by the broker
|
|
|
- unit: '{requests}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=BrokerTopicMetrics,name=TotalProduceRequestsPerSec
|
|
|
- metricAttribute:
|
|
|
- type: const(produce)
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.request.count
|
|
|
- type: counter
|
|
|
- desc: The number of requests received by the broker
|
|
|
- unit: '{requests}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=BrokerTopicMetrics,name=FailedFetchRequestsPerSec
|
|
|
- metricAttribute:
|
|
|
- type: const(fetch)
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.request.failed
|
|
|
- type: counter
|
|
|
- desc: The number of requests to the broker resulting in a failure
|
|
|
- unit: '{requests}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=BrokerTopicMetrics,name=FailedProduceRequestsPerSec
|
|
|
- metricAttribute:
|
|
|
- type: const(produce)
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.request.failed
|
|
|
- type: counter
|
|
|
- desc: The number of requests to the broker resulting in a failure
|
|
|
- unit: '{requests}'
|
|
|
-
|
|
|
- - beans:
|
|
|
- - kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Produce
|
|
|
- - kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchConsumer
|
|
|
- - kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchFollower
|
|
|
- metricAttribute:
|
|
|
- type: param(request)
|
|
|
- unit: ms
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.request.time.total
|
|
|
- type: counter
|
|
|
- desc: The total time the broker has taken to service requests
|
|
|
- 50thPercentile:
|
|
|
- metric: kafka.request.time.50p
|
|
|
- type: gauge
|
|
|
- desc: The 50th percentile time the broker has taken to service requests
|
|
|
- 99thPercentile:
|
|
|
- metric: kafka.request.time.99p
|
|
|
- type: gauge
|
|
|
- desc: The 99th percentile time the broker has taken to service requests
|
|
|
-
|
|
|
- - bean: kafka.network:type=RequestChannel,name=RequestQueueSize
|
|
|
- mapping:
|
|
|
- Value:
|
|
|
- metric: kafka.request.queue
|
|
|
- type: updowncounter
|
|
|
- desc: Size of the request queue
|
|
|
- unit: '{requests}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec
|
|
|
- metricAttribute:
|
|
|
- direction: const(in)
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.network.io
|
|
|
- type: counter
|
|
|
- desc: The bytes received or sent by the broker
|
|
|
- unit: By
|
|
|
-
|
|
|
- - bean: kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec
|
|
|
- metricAttribute:
|
|
|
- direction: const(out)
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.network.io
|
|
|
- type: counter
|
|
|
- desc: The bytes received or sent by the broker
|
|
|
- unit: By
|
|
|
-
|
|
|
- - beans:
|
|
|
- - kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Produce
|
|
|
- - kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Fetch
|
|
|
- metricAttribute:
|
|
|
- type: param(delayedOperation)
|
|
|
- mapping:
|
|
|
- Value:
|
|
|
- metric: kafka.purgatory.size
|
|
|
- type: updowncounter
|
|
|
- desc: The number of requests waiting in purgatory
|
|
|
- unit: '{requests}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=ReplicaManager,name=PartitionCount
|
|
|
- mapping:
|
|
|
- Value:
|
|
|
- metric: kafka.partition.count
|
|
|
- type: updowncounter
|
|
|
- desc: The number of partitions on the broker
|
|
|
- unit: '{partitions}'
|
|
|
-
|
|
|
- - bean: kafka.controller:type=KafkaController,name=OfflinePartitionsCount
|
|
|
- mapping:
|
|
|
- Value:
|
|
|
- metric: kafka.partition.offline
|
|
|
- type: updowncounter
|
|
|
- desc: The number of partitions offline
|
|
|
- unit: '{partitions}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=ReplicaManager,name=UnderReplicatedPartitions
|
|
|
- mapping:
|
|
|
- Value:
|
|
|
- metric: kafka.partition.underReplicated
|
|
|
- type: updowncounter
|
|
|
- desc: The number of under replicated partitions
|
|
|
- unit: '{partitions}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=ReplicaManager,name=IsrShrinksPerSec
|
|
|
- metricAttribute:
|
|
|
- operation: const(shrink)
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.isr.operation.count
|
|
|
- type: updowncounter
|
|
|
- desc: The number of in-sync replica shrink and expand operations
|
|
|
- unit: '{operations}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=ReplicaManager,name=IsrExpandsPerSec
|
|
|
- metricAttribute:
|
|
|
- operation: const(expand)
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.isr.operation.count
|
|
|
- type: updowncounter
|
|
|
- desc: The number of in-sync replica shrink and expand operations
|
|
|
- unit: '{operations}'
|
|
|
-
|
|
|
- - bean: kafka.server:type=ReplicaFetcherManager,name=MaxLag,clientId=Replica
|
|
|
- mapping:
|
|
|
- Value:
|
|
|
- metric: kafka.lag.max
|
|
|
- desc: The max lag in messages between follower and leader replicas
|
|
|
- unit: '{messages}'
|
|
|
-
|
|
|
- - bean: kafka.controller:type=KafkaController,name=ActiveControllerCount
|
|
|
- mapping:
|
|
|
- Value:
|
|
|
- metric: kafka.controller.active.count
|
|
|
- type: updowncounter
|
|
|
- desc: The number of controllers active on the broker
|
|
|
- unit: '{controllers}'
|
|
|
-
|
|
|
- - bean: kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.leaderElection.count
|
|
|
- type: counter
|
|
|
- desc: The leader election count
|
|
|
- unit: '{elections}'
|
|
|
-
|
|
|
- - bean: kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- metric: kafka.leaderElection.unclean.count
|
|
|
- type: counter
|
|
|
- desc: Unclean leader election count - increasing indicates broker failures
|
|
|
- unit: '{elections}'
|
|
|
-
|
|
|
- # Log metrics
|
|
|
-
|
|
|
- - bean: kafka.log:type=LogFlushStats,name=LogFlushRateAndTimeMs
|
|
|
- unit: ms
|
|
|
- type: gauge
|
|
|
- prefix: kafka.logs.flush.
|
|
|
- mapping:
|
|
|
- Count:
|
|
|
- type: counter
|
|
|
- desc: Log flush count
|
|
|
- 50thPercentile:
|
|
|
- metric: time.50p
|
|
|
- desc: Log flush time - 50th percentile
|
|
|
- 99thPercentile:
|
|
|
- metric: time.99p
|
|
|
- desc: Log flush time - 99th percentile
|
|
|
+ - bean: kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.message.count
|
|
|
+ type: counter
|
|
|
+ desc: The number of messages received by the broker
|
|
|
+ unit: "{messages}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=BrokerTopicMetrics,name=TotalFetchRequestsPerSec
|
|
|
+ metricAttribute:
|
|
|
+ type: const(fetch)
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.request.count
|
|
|
+ type: counter
|
|
|
+ desc: The number of requests received by the broker
|
|
|
+ unit: "{requests}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=BrokerTopicMetrics,name=TotalProduceRequestsPerSec
|
|
|
+ metricAttribute:
|
|
|
+ type: const(produce)
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.request.count
|
|
|
+ type: counter
|
|
|
+ desc: The number of requests received by the broker
|
|
|
+ unit: "{requests}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=BrokerTopicMetrics,name=FailedFetchRequestsPerSec
|
|
|
+ metricAttribute:
|
|
|
+ type: const(fetch)
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.request.failed
|
|
|
+ type: counter
|
|
|
+ desc: The number of requests to the broker resulting in a failure
|
|
|
+ unit: "{requests}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=BrokerTopicMetrics,name=FailedProduceRequestsPerSec
|
|
|
+ metricAttribute:
|
|
|
+ type: const(produce)
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.request.failed
|
|
|
+ type: counter
|
|
|
+ desc: The number of requests to the broker resulting in a failure
|
|
|
+ unit: "{requests}"
|
|
|
+
|
|
|
+ - beans:
|
|
|
+ - kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Produce
|
|
|
+ - kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchConsumer
|
|
|
+ - kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchFollower
|
|
|
+ metricAttribute:
|
|
|
+ type: param(request)
|
|
|
+ unit: ms
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.request.time.total
|
|
|
+ type: counter
|
|
|
+ desc: The total time the broker has taken to service requests
|
|
|
+ 50thPercentile:
|
|
|
+ metric: kafka.request.time.50p
|
|
|
+ type: gauge
|
|
|
+ desc: The 50th percentile time the broker has taken to service requests
|
|
|
+ 99thPercentile:
|
|
|
+ metric: kafka.request.time.99p
|
|
|
+ type: gauge
|
|
|
+ desc: The 99th percentile time the broker has taken to service requests
|
|
|
+
|
|
|
+ - bean: kafka.network:type=RequestChannel,name=RequestQueueSize
|
|
|
+ mapping:
|
|
|
+ Value:
|
|
|
+ metric: kafka.request.queue
|
|
|
+ type: updowncounter
|
|
|
+ desc: Size of the request queue
|
|
|
+ unit: "{requests}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec
|
|
|
+ metricAttribute:
|
|
|
+ direction: const(in)
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.network.io
|
|
|
+ type: counter
|
|
|
+ desc: The bytes received or sent by the broker
|
|
|
+ unit: By
|
|
|
+
|
|
|
+ - bean: kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec
|
|
|
+ metricAttribute:
|
|
|
+ direction: const(out)
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.network.io
|
|
|
+ type: counter
|
|
|
+ desc: The bytes received or sent by the broker
|
|
|
+ unit: By
|
|
|
+
|
|
|
+ - beans:
|
|
|
+ - kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Produce
|
|
|
+ - kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Fetch
|
|
|
+ metricAttribute:
|
|
|
+ type: param(delayedOperation)
|
|
|
+ mapping:
|
|
|
+ Value:
|
|
|
+ metric: kafka.purgatory.size
|
|
|
+ type: updowncounter
|
|
|
+ desc: The number of requests waiting in purgatory
|
|
|
+ unit: "{requests}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=ReplicaManager,name=PartitionCount
|
|
|
+ mapping:
|
|
|
+ Value:
|
|
|
+ metric: kafka.partition.count
|
|
|
+ type: updowncounter
|
|
|
+ desc: The number of partitions on the broker
|
|
|
+ unit: "{partitions}"
|
|
|
+
|
|
|
+ - bean: kafka.controller:type=KafkaController,name=OfflinePartitionsCount
|
|
|
+ mapping:
|
|
|
+ Value:
|
|
|
+ metric: kafka.partition.offline
|
|
|
+ type: updowncounter
|
|
|
+ desc: The number of partitions offline
|
|
|
+ unit: "{partitions}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=ReplicaManager,name=UnderReplicatedPartitions
|
|
|
+ mapping:
|
|
|
+ Value:
|
|
|
+ metric: kafka.partition.underReplicated
|
|
|
+ type: updowncounter
|
|
|
+ desc: The number of under replicated partitions
|
|
|
+ unit: "{partitions}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=ReplicaManager,name=IsrShrinksPerSec
|
|
|
+ metricAttribute:
|
|
|
+ operation: const(shrink)
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.isr.operation.count
|
|
|
+ type: updowncounter
|
|
|
+ desc: The number of in-sync replica shrink and expand operations
|
|
|
+ unit: "{operations}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=ReplicaManager,name=IsrExpandsPerSec
|
|
|
+ metricAttribute:
|
|
|
+ operation: const(expand)
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.isr.operation.count
|
|
|
+ type: updowncounter
|
|
|
+ desc: The number of in-sync replica shrink and expand operations
|
|
|
+ unit: "{operations}"
|
|
|
+
|
|
|
+ - bean: kafka.server:type=ReplicaFetcherManager,name=MaxLag,clientId=Replica
|
|
|
+ mapping:
|
|
|
+ Value:
|
|
|
+ metric: kafka.lag.max
|
|
|
+ desc: The max lag in messages between follower and leader replicas
|
|
|
+ unit: "{messages}"
|
|
|
+
|
|
|
+ - bean: kafka.controller:type=KafkaController,name=ActiveControllerCount
|
|
|
+ mapping:
|
|
|
+ Value:
|
|
|
+ metric: kafka.controller.active.count
|
|
|
+ type: updowncounter
|
|
|
+ desc: The number of controllers active on the broker
|
|
|
+ unit: "{controllers}"
|
|
|
+
|
|
|
+ - bean: kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.leaderElection.count
|
|
|
+ type: counter
|
|
|
+ desc: The leader election count
|
|
|
+ unit: "{elections}"
|
|
|
+
|
|
|
+ - bean: kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ metric: kafka.leaderElection.unclean.count
|
|
|
+ type: counter
|
|
|
+ desc: Unclean leader election count - increasing indicates broker failures
|
|
|
+ unit: "{elections}"
|
|
|
+
|
|
|
+ # Log metrics
|
|
|
+
|
|
|
+ - bean: kafka.log:type=LogFlushStats,name=LogFlushRateAndTimeMs
|
|
|
+ unit: ms
|
|
|
+ type: gauge
|
|
|
+ prefix: kafka.logs.flush.
|
|
|
+ mapping:
|
|
|
+ Count:
|
|
|
+ type: counter
|
|
|
+ desc: Log flush count
|
|
|
+ 50thPercentile:
|
|
|
+ metric: time.50p
|
|
|
+ desc: Log flush time - 50th percentile
|
|
|
+ 99thPercentile:
|
|
|
+ metric: time.99p
|
|
|
+ desc: Log flush time - 99th percentile
|