diff --git a/.gitignore b/.gitignore index 1d52ed62..756e7aac 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ metricbeat-elastic-kibana/assets/security/ both-enabled/ .vscode/ utils/testing/output/* -utils/testing/code/*.ipynb \ No newline at end of file +utils/testing/code/*.ipynb +__pycache__/ diff --git a/cfk-prometheus-grafana/README.md b/cfk-prometheus-grafana/README.md new file mode 100644 index 00000000..0edc525c --- /dev/null +++ b/cfk-prometheus-grafana/README.md @@ -0,0 +1,10 @@ +# Prometheus and Grafana stack for CFK (Confluent for Kubernetes) + +## Requirements + +- Prometheus and Grafana deployed on Kubernetes: https://artifacthub.io/packages/helm/prometheus-community/prometheus + +## How to run + +- Include metrics configuration in the Confluent Platform CRDs, following [this](./cfk/confluent-platform.yaml). +- Deploy Grafana dashboards. Very similar to the ones [here](../jmxexporter-prometheus-grafana) but tweaked with Namespace and Pod variables. diff --git a/cfk-prometheus-grafana/cfk/confluent-platform.yaml b/cfk-prometheus-grafana/cfk/confluent-platform.yaml new file mode 100644 index 00000000..07ab20c7 --- /dev/null +++ b/cfk-prometheus-grafana/cfk/confluent-platform.yaml @@ -0,0 +1,730 @@ +--- +apiVersion: platform.confluent.io/v1beta1 +kind: Zookeeper +metadata: + name: zookeeper + namespace: confluent +spec: + replicas: 3 + image: + application: confluentinc/cp-zookeeper:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dataVolumeCapacity: 10Gi + logVolumeCapacity: 10Gi + metrics: + prometheus: + rules: + - labels: + client_address: $4 + connection_id: $5 + member_type: $3 + server_id: $2 + server_name: $1 + name: zookeeper_connections_$6 + pattern: org.apache.ZooKeeperService<>([^:]+) + valueFactor: 1 + - labels: + member_type: $3 + server_id: $2 + server_name: $1 + name: zookeeper_$4 + pattern: 'org.apache.ZooKeeperService<>(\w+): + (\d+)' + valueFactor: 1 + - labels: + member_type: $3 + server_id: $2 + server_name: $1 + name: zookeeper_inmemorydatatree_$4 + pattern: 'org.apache.ZooKeeperService<>(WatchCount|NodeCount): (\d+)' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + server_id: $2 + server_name: $1 + name: zookeeper_status + pattern: 'org.apache.ZooKeeperService<>(.+): + (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + server_id: $1 + name: zookeeper_status_$2 + pattern: 'org.apache.ZooKeeperService<>(QuorumSize): + (\d+)' + type: GAUGE + valueFactor: 1 + - labels: + server_id: "1" + server_name: $1 + name: zookeeper_inmemorydatatree_$2 + pattern: 'org.apache.ZooKeeperService<>(WatchCount|NodeCount): + (\d+)' + type: GAUGE + valueFactor: 1 + - labels: + client_address: $2 + connection_id: $3 + server_name: $1 + name: zookeeper_connections_$4 + pattern: org.apache.ZooKeeperService<>([^:]+) + valueFactor: 1 + - labels: + $2: $3 + server_name: $1 + name: zookeeper_$2 + pattern: 'org.apache.ZooKeeperService<>(StartTime|ClientPort|SecureClientAddress|Version|SecureClientPort): + (.+)' + value: "1" + valueFactor: 1 + - name: zookeeper_$2 + pattern: 'org.apache.ZooKeeperService<>(.+): (.+)' + type: GAUGE + valueFactor: 1 + whitelist: + - org.apache.ZooKeeperService:name3=Connections,* + - org.apache.ZooKeeperService:name3=InMemoryDataTree,* + - org.apache.ZooKeeperService:name0=*,name1=replica*,name2=* + - org.apache.ZooKeeperService:name0=*,name1=replica* + - org.apache.ZooKeeperService:name0=* + - org.apache.ZooKeeperService:name1=InMemoryDataTree,name0=* + - org.apache.ZooKeeperService:name0=*,name1=Connections,name2=*,name3=* + +--- +apiVersion: platform.confluent.io/v1beta1 +kind: Kafka +metadata: + name: kafka + namespace: confluent +spec: + replicas: 3 + image: + application: confluentinc/cp-server:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dataVolumeCapacity: 10Gi + metricReporter: + enabled: true + metrics: + prometheus: + blacklist: + - kafka.consumer:type=*,id=* + - kafka.consumer:type=*,client-id=* + - kafka.consumer:type=*,client-id=*,node-id=* + - kafka.producer:type=*,id=* + - kafka.producer:type=*,client-id=* + - kafka.producer:type=*,client-id=*,node-id=* + - kafka.*:type=kafka-metrics-count,* + - kafka.admin.client:* + - kafka.server:type=*,cipher=*,protocol=*,listener=*,networkProcessor=* + - kafka.server:type=* + rules: + - labels: + partition: $5 + topic: $4 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>Value + type: GAUGE + valueFactor: "1" + - labels: + clientId: $3 + partition: $5 + topic: $4 + name: kafka_server_$1_$2 + pattern: kafka.server<>Value + type: GAUGE + valueFactor: "1" + - labels: + broker: $4:$5 + clientId: $3 + name: kafka_server_$1_$2 + pattern: kafka.server<>Value + type: GAUGE + valueFactor: "1" + - labels: + $4: $5 + $6: $7 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(Count|Value) + valueFactor: "1" + - labels: + $4: $5 + $6: $7 + quantile: 0.$8 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(\d+)thPercentile + type: GAUGE + valueFactor: "1" + - labels: + $3: $4 + $5: $6 + $7: $8 + name: kafka_$1_$2 + pattern: kafka.(\w+)<>Value + valueFactor: "1" + - labels: + $4: $5 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(Count|Value) + valueFactor: "1" + - labels: + $3: $4 + $5: $6 + name: kafka_$1_$2 + pattern: kafka.(\w+)<>(Count|Value) + valueFactor: "1" + - name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(Count|Value) + valueFactor: "1" + - labels: + $3: $4 + name: kafka_$1_$2 + pattern: kafka.(\w+)<>Value + valueFactor: "1" + - name: kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total + pattern: kafka.server<>OneMinuteRate + type: GAUGE + valueFactor: "1" + - labels: + client_software_name: $1 + client_software_version: $2 + listener: $3 + network_processor: $4 + name: kafka_server_socketservermetrics_connections + pattern: kafka.server<>connections + type: GAUGE + valueFactor: "1" + - labels: + listener: $1 + network_processor: $2 + name: kafka_server_socketservermetrics_$3 + pattern: 'kafka.server<>(.+):' + type: GAUGE + valueFactor: "1" + - name: kafka_coordinator_$1_$2_$3 + pattern: kafka.coordinator.(\w+)<>(Count|Value) + valueFactor: "1" + - labels: + $4: $5 + quantile: 0.$6 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(\d+)thPercentile + type: GAUGE + valueFactor: "1" + - labels: + quantile: 0.$4 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(\d+)thPercentile + type: GAUGE + valueFactor: "1" + - labels: + $3: $4 + $5: $6 + $7: $8 + name: confluent_$1_$2 + pattern: confluent.(\w+)<>Value + type: GAUGE + valueFactor: "1" + - labels: + $3: $4 + $5: $6 + name: confluent_$1_$2 + pattern: confluent.(.+)<>Value + type: GAUGE + valueFactor: "1" + - labels: + $3: $4 + name: confluent_$1_$2 + pattern: confluent.(.+)<>Value + type: GAUGE + valueFactor: "1" + - labels: + client-id: $3 + user: $2 + name: kafka_server_$1_$4 + pattern: 'kafka.server<>(.+):' + type: GAUGE + valueFactor: "1" + - labels: + user: $2 + name: kafka_server_$1_$3 + pattern: 'kafka.server<>(.+):' + type: GAUGE + valueFactor: "1" + - labels: + client-id: $2 + name: kafka_server_$1_$3 + pattern: 'kafka.server<>(.+):' + type: GAUGE + valueFactor: "1" +--- +apiVersion: platform.confluent.io/v1beta1 +kind: SchemaRegistry +metadata: + name: schemaregistry + namespace: confluent +spec: + replicas: 3 + image: + application: confluentinc/cp-schema-registry:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + metrics: + prometheus: + blacklist: + - kafka.producer:type=app-info,client-id=* + - kafka.consumer:type=app-info,client-id=* + - kafka.admin.client:* + - kafka.consumer:type=*,id=* + - kafka.producer:type=*,id=* + - kafka.*:type=kafka-metrics-count,* + rules: + - name: kafka_schema_registry_jetty_metrics_$1 + pattern: 'kafka.schema.registry([^:]+):' + valueFactor: 1 + - name: kafka_schema_registry_jersey_metrics_$1 + pattern: 'kafka.schema.registry([^:]+):' + valueFactor: 1 + - labels: + $2: $3 + client-id: $1 + name: kafka_schema_registry_app_info + pattern: 'kafka.schema.registry<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - name: kafka_schema_registry_registered_count + pattern: 'kafka.schema.registry([^:]+):' + valueFactor: 1 + - labels: + schema_type: $1 + name: kafka_schema_registry_schemas_$2 + pattern: 'kafka.schema.registry([^:]+):' + valueFactor: 1 + - labels: + client_id: $2 + name: kafka_schema_registry_$1_$3 + pattern: 'kafka.schema.registry<>([^:]+):' + valueFactor: 1 + - labels: + $3: $4 + client_id: $2 + client_type: $1 + name: kafka_$1_app_info + pattern: 'kafka.(.+)<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + $7: $8 + client_type: $1 + name: kafka_$1_$2_$9 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + client_type: $1 + name: kafka_$1_$2_$7 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_type: $1 + name: kafka_$1_$2_$5 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + client_type: $1 + name: kafka_$1_$2_$3 + pattern: 'kafka.(.+)<>(.+):' + valueFactor: 1 + whitelist: + - kafka.schema.registry:type=jetty-metrics + - kafka.schema.registry:type=jersey-metrics + - kafka.schema.registry:type=app-info,id=* + - kafka.schema.registry:type=registered-count + - kafka.schema.registry:type=json-schema* + - kafka.schema.registry:type=protobuf-schemas* + - kafka.schema.registry:type=avro-schemas* + - kafka.schema.registry:type=kafka.schema.registry-metrics,client-id=* + - kafka.schema.registry:type=kafka.schema.registry-coordinator-metrics,client-id=* + - kafka.consumer:* + - kafka.producer:* + +--- +apiVersion: platform.confluent.io/v1beta1 +kind: Connect +metadata: + name: connect + namespace: confluent +spec: + replicas: 1 + image: + application: confluentinc/cp-server-connect:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dependencies: + kafka: + bootstrapEndpoint: kafka:9071 + metrics: + prometheus: + blacklist: + - kafka.admin.client:* + - kafka.consumer:type=*,id=* + - kafka.producer:type=*,id=* + - kafka.producer:client-id=confluent.monitoring*,* + - kafka.*:type=kafka-metrics-count,* + rules: + - labels: + $2: $3 + client-id: $1 + name: kafka_connect_app_info + pattern: 'kafka.connect<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - name: kafka_connect_connect_worker_rebalance_metrics_$1 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + client_id: $2 + name: kafka_connect_$1_$3 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + connector: aggregate + name: kafka_connect_connect_worker_metrics_$1 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + connector: $1 + name: kafka_connect_connect_worker_metrics_$2 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + $2: $3 + connector: $1 + name: kafka_connect_connector_metrics + pattern: 'kafka.connect<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + connector: $2 + task: $3 + name: kafka_connect_$1_task_metrics_$4 + pattern: 'kafka.connect<>(.+): + (.+)' + valueFactor: 1 + - labels: + connector: $1 + task: $2 + name: kafka_connect_task_error_metrics_$3 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + $1: $2 + $3: $4 + $5: $6 + $7: $8 + name: confluent_replicator_task_metrics_$9 + pattern: 'confluent.replicator<>confluent-replicator-task-topic-partition-(.*): + (.*)' + valueFactor: 1 + - labels: + $1: $2 + $3: $4 + $5: $6 + $7: $8 + $9: $10 + name: confluent_replicator_task_metrics_info + pattern: 'confluent.replicator<>(confluent-replicator-destination-cluster|confluent-replicator-source-cluster|confluent-replicator-destination-topic-name): + (.*)' + value: "1" + valueFactor: 1 + - labels: + $3: $4 + client_id: $2 + client_type: $1 + name: kafka_$1_app_info + pattern: 'kafka.(.+)<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + $7: $8 + client_type: $1 + name: kafka_$1_$2_$9 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + client_type: $1 + name: kafka_$1_$2_$7 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_type: $1 + name: kafka_$1_$2_$5 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + client_type: $1 + name: kafka_$1_$2_$3 + pattern: 'kafka.(.+)<>(.+):' + valueFactor: 1 + whitelist: + - kafka.connect:type=app-info,client-id=* + - kafka.connect:type=connect-worker-rebalance-metrics + - kafka.connect:type=connect-coordinator-metrics,* + - kafka.connect:type=connect-metrics,* + - kafka.connect:type=connect-worker-metrics + - kafka.connect:type=connect-worker-metrics,* + - kafka.connect:type=connector-metrics,* + - kafka.connect:type=*-task-metrics,* + - kafka.connect:type=task-error-metrics,* + - confluent.replicator:type=confluent-replicator-task-metrics,* + - kafka.consumer:* + - kafka.producer:* + +--- +apiVersion: platform.confluent.io/v1beta1 +kind: KsqlDB +metadata: + name: ksqldb + namespace: confluent +spec: + replicas: 1 + image: + application: confluentinc/cp-ksqldb-server:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dataVolumeCapacity: 10Gi + metrics: + prometheus: + blacklist: + - kafka.streams:type=kafka-metrics-count + - kafka.admin.client:* + - kafka.consumer:type=*,id=* + - kafka.consumer:type=*,client-id=* + - kafka.consumer:type=*,client-id=*,node-id=* + - kafka.producer:type=*,id=* + - kafka.producer:type=*,client-id=* + - kafka.producer:type=*,client-id=*,node-id=* + - kafka.streams:type=stream-processor-node-metrics,thread-id=*,task-id=*,processor-node-id=* + - kafka.*:type=kafka-metrics-count,* + rules: + - labels: + id: $3 + key: $2 + name: ksql_$1_$4 + pattern: io.confluent.ksql.metrics<>([^:]+) + valueFactor: 1 + - labels: + ksql_cluster: $1 + name: ksql_ksql_engine_query_stats_$2 + pattern: io.confluent.ksql.metrics<>([^:]+) + valueFactor: 1 + - labels: + $4: $5 + ksql_cluster: $2 + ksql_query: $3 + name: ksql_ksql_metrics_$1_$4 + pattern: 'io.confluent.ksql.metrics<>(.+): + (.+)' + value: "1" + valueFactor: 1 + - labels: + $4: $5 + task_id: $3 + thread_id: $2 + name: kafka_streams_$1_$6 + pattern: 'kafka.streams<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + task_id: $3 + thread_id: $2 + name: kafka_streams_$1_$4 + pattern: 'kafka.streams<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $1: $2 + $3: $4 + name: kafka_streams_stream_metrics + pattern: 'kafka.streams<>(state|alive-stream-threads|commit-id|version|application-id): + (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $2: $3 + name: kafka_streams_$1_$4 + pattern: kafka.streams<>([^:]+) + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_id: $2 + client_type: $1 + name: kafka_$1_app_info + pattern: 'kafka.(.+)<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + $7: $8 + client_type: $1 + name: kafka_$1_$2_$9 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + client_type: $1 + name: kafka_$1_$2_$7 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_type: $1 + name: kafka_$1_$2_$5 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + client_type: $1e + name: kafka_$1_$2_$3 + pattern: 'kafka.(.+)<>(.+):' + valueFactor: 1 + whitelist: + - io.confluent.ksql.metrics:* + - kafka.consumer:* + - kafka.producer:* + - kafka.streams:* + +--- +apiVersion: platform.confluent.io/v1beta1 +kind: ControlCenter +metadata: + name: controlcenter + namespace: confluent +spec: + replicas: 1 + image: + application: confluentinc/cp-enterprise-control-center:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dataVolumeCapacity: 10Gi + dependencies: + schemaRegistry: + url: http://schemaregistry.confluent.svc.cluster.local:8081 + ksqldb: + - name: ksqldb + url: http://ksqldb.confluent.svc.cluster.local:8088 + connect: + - name: connect + url: http://connect.confluent.svc.cluster.local:8083 + metrics: + prometheus: + blacklist: + - kafka.streams:type=kafka-metrics-count + - kafka.admin.client:* + - kafka.consumer:type=*,id=* + - kafka.producer:type=*,id=* + - kafka.*:type=kafka-metrics-count,* + rules: + - labels: + $4: $5 + task_id: $3 + thread_id: $2 + name: kafka_streams_$1_$6 + pattern: 'kafka.streams<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + task_id: $3 + thread_id: $2 + name: kafka_streams_$1_$4 + pattern: 'kafka.streams<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $1: $2 + $3: $4 + name: kafka_streams_stream_metrics + pattern: 'kafka.streams<>(state|alive-stream-threads|commit-id|version|application-id): + (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $2: $3 + name: kafka_streams_$1_$4 + pattern: kafka.streams<>([^:]+) + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_id: $2 + client_type: $1 + name: kafka_$1_app_info + pattern: 'kafka.(.+)<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + $7: $8 + client_type: $1 + name: kafka_$1_$2_$9 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + client_type: $1 + name: kafka_$1_$2_$7 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_type: $1 + name: kafka_$1_$2_$5 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + client_type: $1 + name: kafka_$1_$2_$3 + pattern: 'kafka.(.+)<>(.+):' + valueFactor: 1 + whitelist: + - kafka.streams:* + - kafka.consumer:* + - kafka.producer:* diff --git a/cfk-prometheus-grafana/grafana/confluent-platform.json b/cfk-prometheus-grafana/grafana/confluent-platform.json new file mode 100644 index 00000000..55305913 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/confluent-platform.json @@ -0,0 +1,2681 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the main health-check metrics from Confluent Platform components.", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: ZNodes (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_numaliveconnections{namespace=\"$env\"} / zookeeper_maxclientcnxnsperhost{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Connections used", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "last" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_outstandingrequests{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Count of brokers available (online).\n This value is referential and should not be used for alerting.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$env\"} > 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of Topic partitions across the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n It's recommended alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-MinISR Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n It's recommended alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Offline Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Schema Registry cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Schema Registry online instances returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of registered schemas across the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Registered Schemas (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of schemas created, by type.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Schemas Created by Type (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of schemas deleted, by type.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Schemas Deleted by Type (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Kafka Connect online workers returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$env\",app=~\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" + } + ], + "repeat": "connect_cluster", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka Connect cluster: $connect_cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "ksqlDB online instances returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of active queries deployed in the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Queries Failed", + "transformations": [], + "transparent": false, + "type": "stat" + } + ], + "repeat": "ksqldb_cluster", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB cluster: $ksqldb_cluster", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": true, + "includeAll": false, + "label": "Kafka Connect cluster", + "multi": false, + "name": "connect_cluster", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\"}, app)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": true, + "includeAll": false, + "label": "ksqlDB cluster", + "multi": false, + "name": "ksqldb_cluster", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$env\"}, app)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Confluent Platform overview", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/kafka-cluster.json b/cfk-prometheus-grafana/grafana/kafka-cluster.json new file mode 100644 index 00000000..74c899e8 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-cluster.json @@ -0,0 +1,5537 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Count of brokers available (online).\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$env\"} > 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "\n Number of partitions where the preferred replica is not the leader.\n Usually, this number is 0.\n Restarting nodes could cause this values to change, but when reassigning happens the value stabilize.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Preferred Replica Imbalance", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of topics in the cluster.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Topics", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of requests per second rated over a 5 min. period.\n Gives an idea of the processing load in the cluster.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Rate of Requests/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of log sizes per broker.\n This must be compared with the total storage space available in the brokers.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_log_log_size{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Log Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of Topic partitions across the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n We recommend alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-MinISR Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n We recommend alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Offline Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of bytes in per second rated over a 5 min. period.\n Gives an idea of the incoming throughput handle by the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of bytes out per second rated over a 5 min. period.\n Gives an idea of the outgoing throughput handle by the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System resources", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of messages into topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of bytes into topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of bytes out of topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Percent of time the network thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$env\",pod=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network processor usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Percent of time the IO thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$env\",pod=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request processor (IO) usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread utilization", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Requests per second rated over a 5 minutes period.\n Includes API call and version.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{request}}(v{{version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Requests rates", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Request Errors per second rated over a 5 minutes period.\n Includes API call and version.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace=\"$env\",pod=~\"$broker\",error!=\"NONE\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{error}}@{{request}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Error rates", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request rates", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of connections count across cluster by brokers", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections alive per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of rate of connections created across cluster by brokers", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections creation rate per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of rate of connections closed across cluster by brokers", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections close rate per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of connections count across cluster by listeners", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections alive per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of rate of connections created across cluster by listener", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections creation rate per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of rate of connections closed across cluster by listener", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections close rate per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Rate of ISR shrinks per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{namespace=\"$env\",pod=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of ISR Shrinks/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Rate of ISR expands per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrexpandspersec{namespace=\"$env\",pod=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of ISR Expands/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "In-Sync Replicas", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Producer", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 48, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 49, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Consumer Fetch", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 50, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 51, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 52, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 53, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 14 + }, + "height": null, + "hideTimeOverride": false, + "id": 54, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 14 + }, + "height": null, + "hideTimeOverride": false, + "id": 55, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Replica Fetch", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 56, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of groups managed by Broker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 57, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{namespace=\"$env\",pod=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Groups per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of stable groups managed by Broker", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 58, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "stable", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "preparing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "dead", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "completing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "empty", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Groups per Broker per Status", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Group Coordinator", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 59, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of produce message conversions per second.\n This value increases when the broker receives produce messages from clients using older versions.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "opsps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 60, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Produce conversion rate per sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of fetch message conversions per second.\n This value increases when the broker receives fetch messages from clients using older versions.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "opsps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 61, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Fetch conversion rate per sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of connections aggregated by client version and name.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 62, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connections{namespace=\"$env\",pod=~\"$broker\"}) by (client_software_name,client_software_version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_software_name}} (v{{client_software_version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections per version", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Message Conversion", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Broker", + "multi": true, + "name": "broker", + "options": [], + "query": "label_values(kafka_server_replicamanager_leadercount{namespace=\"$env\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka cluster", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json new file mode 100644 index 00000000..261d085c --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json @@ -0,0 +1,4154 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka Connect cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Kafka Connect online workers returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",version!=\"\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$env\",app=\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": "Connect workers metadata and stats.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",start_time_ms!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",version!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect Workers", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "pod" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "pod", + "app 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + "namespace 1" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "app 1": 1, + "namespace 1": 0, + "pod": 2, + "start_time_ms": 3, + "version": 4 + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + "app 1": "cluster", + "namespace 1": "environment", + "pod": "worker", + "start_time_ms": "start time", + "version": "version" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": "Connectors deployed and task stats.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_info{namespace=\"$env\",app=\"$connect_cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connectors", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "connector" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "How much time the connector tasks are in running state.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_running_ratio{namespace=\"$env\",app=\"$connect_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Tasks Running Ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average ime spent on rebalance state.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Latency (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Maximum and average size of the batches processed by the connector task.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_avg{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Percentage of offset commit successful and failed.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_failure_percentage{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (failure)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset commit success/failure", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average and Maximum time in milliseconds taken by the task to commit offsets", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset commit latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of failures seen by task.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_failures{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Record Failures", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of errors seen by task.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_errors{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Record Error", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of records skipped seen by task.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_records_skipped{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Records Skipped", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of records logged seen by task.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_errors_logged{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Errors Logged", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of retries seen by task.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_retries{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Retries", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of produce requests to dead letter topics.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Dead Letter Topic Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Task Errors", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average and Maximum time in milliseconds taken by this task to poll for a batch of source records", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Batch Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Before transformations are applied, \n this is the average per-second number of records produced or \n polled by the task belonging to the named source connector in the worker\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Poll Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "After transformations are applied, \n this is the average per-second number of records output from the transformations and \n written to Kafka for the task belonging to the named source connector in the worker \n (excludes any records filtered out by the transformations)\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Write Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Source Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Batch Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Before transformations are applied, \n this is the average per-second number of records read from Kafka \n for the task belonging to the named sink connector in the worker\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_sink_record_read_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sink Record Read Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "After transformations are applied, \n this is the average per-second number of records output from the transformations and \n sent to the task belonging to the named sink connector in the worker \n (excludes any records filtered out by the transformations)\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_sink_record_send_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sink Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of topic partitions assigned to the task and \n which belong to the named sink connector in the worker\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_partition_count{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Partition Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Sink Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Incoming byte rate per second per worker.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Outgoing byte rate per second per worker.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Fraction of time the I/O thread spent doing I/O", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_io_ratio{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO Ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of network operations (reads or writes) on all connections per second", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_network_io_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network IO Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of active connections", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_connection_count{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Active Connections", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Successful and failed authentications per second.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_successful_authentication_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_failed_authentication_total{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Authentication", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connect Workers", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-connect" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Connect cluster", + "multi": false, + "name": "connect_cluster", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\"}, app)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Connect worker", + "multi": true, + "name": "connect_worker", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Connector", + "multi": true, + "name": "connector", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"}, connector)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Connect cluster", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/kafka-consumer.json b/cfk-prometheus-grafana/grafana/kafka-consumer.json new file mode 100644 index 00000000..7c7f44a2 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-consumer.json @@ -0,0 +1,4582 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka consumers", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Consumed Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Lag", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Rate per hour", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Failed Rebalance Rate per hour", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_consumer_app_info{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\", version!=\"\"}) by (version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Versions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cts" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Lag Max", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Response Time (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Last Heartbeat Seconds Ago", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Rate Per Hour", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Assigned Partitions", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Consumer group", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Select Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Response Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Consumed Rate per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Consumed Rate per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Size per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request Avg. per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-consumer" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "server", + "options": [], + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"consumer\"},pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"consumer\"},client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Consumer", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/kafka-producer.json b/cfk-prometheus-grafana/grafana/kafka-producer.json new file mode 100644 index 00000000..ff50cd48 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-producer.json @@ -0,0 +1,4087 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka producers", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Error Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Retry Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_producer_app_info{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\",version!=\"\"}) by (version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Versions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Metadata Age", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_request_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request in-flight", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Error Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Split Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Compression Rate (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_count{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_select_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Select Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Response Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Compression Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Error Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-producer" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "server", + "options": [], + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\"},pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\"},client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Producer", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/kafka-quotas.json b/cfk-prometheus-grafana/grafana/kafka-quotas.json new file mode 100644 index 00000000..17ba4685 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-quotas.json @@ -0,0 +1,779 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka quotass", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_produce_byte_rate{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_fetch_byte_rate{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_request_request_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_produce_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_fetch_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_request_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-quota" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Broker", + "multi": true, + "name": "broker", + "options": [], + "query": "label_values(kafka_server_produce_byte_rate{namespace=\"$env\"},pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "User", + "multi": true, + "name": "user", + "options": [], + "query": "label_values(user)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Quotas", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/kafka-topics.json b/cfk-prometheus-grafana/grafana/kafka-topics.json new file mode 100644 index 00000000..6951e4e7 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-topics.json @@ -0,0 +1,967 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka topics", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$env\",topic=~\"$topic\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(kafka_log_log_size{namespace=\"$env\",topic=~\"$topic\"}) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Log size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",topic=~\"$topic\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",topic=~\"$topic\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Requests/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer Fetch Requests/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": true + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_log_log_logstartoffset{namespace=\"$env\",topic=~\"$topic\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_log_log_logendoffset{namespace=\"$env\",topic=~\"$topic\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offsets", + "transformations": [ + { + "id": "concatenate", + "options": { + "frameNameLabel": "id", + "frameNameMode": "label" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "pod 1", + "topic 1", + "Value #A", + "Value #B", + "partition 1" + ] + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition 1" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic 1" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value #A": 3, + "Value #B": 4, + "partition 1": 2, + "pod 1": 5, + "topic 1": 1 + }, + "renameByName": { + "Value #A": "start offset", + "Value #B": "end offset", + "partition 1": "", + "pod 1": "broker", + "topic": "", + "topic 1": "" + } + } + } + ], + "transparent": false, + "type": "table" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Offsets", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Topic", + "multi": true, + "name": "topic", + "options": [], + "query": "label_values(kafka_log_log_size{namespace=\"$env\"}, topic)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka topics", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json new file mode 100644 index 00000000..946951e4 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json @@ -0,0 +1,3339 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of ksqlDB clusters.", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "ksqlDB online instances returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of active queries deployed in the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Queries Failed", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "A metric with constant value 1 indicating the server is up and emitting metrics.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$env\",app=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Liveness", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "The number of messages consumed per second across all queries.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace=\"$env\",app=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages consumed/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "The number of messages produced per second across all queries.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace=\"$env\",app=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages produced/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Process Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Punctuate Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Queries Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "State Stores", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "ksqldb" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "ksqlDB cluster", + "multi": false, + "name": "ksqldb_cluster", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\"},app)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 2, + "includeAll": false, + "label": "ksqlDB cluster ID", + "multi": false, + "name": "ksqldb_cluster_id", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\"},ksql_cluster)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "ksqlDB server", + "multi": true, + "name": "ksqldb_server", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "ksqlDB cluster", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json new file mode 100644 index 00000000..bcf9cc96 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json @@ -0,0 +1,1278 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Schema Registry cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Schema Registry online instances returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of registered schemas across the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Registered Schemas (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of schemas created, by type.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Created Schemas by Type (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Deleted Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of active connections", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Active Connections", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=\"$sr_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=\"$sr_server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=\"$sr_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_kafka_schema_registry_metrics_connection_count{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_jersey_metrics_request_rate{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_jersey_metrics_request_latency_99{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency (p99)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "schema-registry" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "sr_server", + "options": [], + "query": "label_values(kafka_schema_registry_registered_count{namespace=\"$env\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Schema Registry cluster", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/zookeeper-cluster.json b/cfk-prometheus-grafana/grafana/zookeeper-cluster.json new file mode 100644 index 00000000..156bea72 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/zookeeper-cluster.json @@ -0,0 +1,1914 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Zookeeper cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: ZNodes (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_numaliveconnections{namespace=\"$env\"} / zookeeper_maxclientcnxnsperhost{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Connections used", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "last" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_outstandingrequests{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$zk_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=\"$zk_server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=\"$zk_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_minrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Minimum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_avgrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Average)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_maxrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Maximum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Server Latency", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$env\",quantile=~\"$quantile\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sync Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Expired Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Disconnected Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Auth Failures on Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Latency (Kafka)", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka", + "zookeeper" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "zk_server", + "options": [], + "query": "label_values(zookeeper_outstandingrequests{namespace=\"$env\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$env\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Zookeeper cluster", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile new file mode 100644 index 00000000..289c29b2 --- /dev/null +++ b/grafana-dashboards/Makefile @@ -0,0 +1,35 @@ +all: + $(MAKE) def + $(MAKE) cfk + +def: OUTPUT_DIR=../jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards +def: export DATASOURCE=Prometheus +def: export ENV_LABEL=env +def: export SERVER_LABEL=hostname +def: export KSQLDB_CLUSTER_LABEL=ksqldb_cluster_id +def: export CONNECT_CLUSTER_LABEL=kafka_connect_cluster_id +.PHONY: def +def: dashboards + +OUTPUT_DIR=default +dashboards: + @mkdir -p $(OUTPUT_DIR) + @generate-dashboard confluent-platform.py -o $(OUTPUT_DIR)/confluent-platform.json + @generate-dashboard zookeeper-cluster.py -o $(OUTPUT_DIR)/zookeeper-cluster.json + @generate-dashboard kafka-cluster.py -o $(OUTPUT_DIR)/kafka-cluster.json + @generate-dashboard kafka-topics.py -o $(OUTPUT_DIR)/kafka-topics.json + @generate-dashboard schema-registry-cluster.py -o $(OUTPUT_DIR)/schema-registry-cluster.json + @generate-dashboard kafka-connect-cluster.py -o $(OUTPUT_DIR)/kafka-connect-cluster.json + @generate-dashboard ksqldb-cluster.py -o $(OUTPUT_DIR)/ksqldb-cluster.json + @generate-dashboard kafka-producer.py -o $(OUTPUT_DIR)/kafka-producer.json + @generate-dashboard kafka-consumer.py -o $(OUTPUT_DIR)/kafka-consumer.json + @generate-dashboard kafka-quotas.py -o $(OUTPUT_DIR)/kafka-quotas.json + +cfk: OUTPUT_DIR=../cfk-prometheus-grafana/grafana +cfk: export DATASOURCE=$${DS_PROMETHEUS} +cfk: export ENV_LABEL=namespace +cfk: export SERVER_LABEL=pod +cfk: export KSQLDB_CLUSTER_LABEL=app +cfk: export CONNECT_CLUSTER_LABEL=app +.PHONY: cfk +cfk: dashboards diff --git a/grafana-dashboards/README.md b/grafana-dashboards/README.md new file mode 100644 index 00000000..2a4a6445 --- /dev/null +++ b/grafana-dashboards/README.md @@ -0,0 +1,48 @@ +# Grafana dashboards for Confluent Platform + +## Dashboards + +- Confluent Platform overview: main metrics from all Confluent components. +- Kafka Cluster: Kafka cluster heath and performance metrics. +- Kafka Topics: Kafka topics throughput metrics. +- Schema Registry Cluster: Servers and subjects/schemas metrics. +- Kafka Connect Cluster: Connect workers and connectors metrics. +- ksqlDB Cluster: Servers and queries metrics. +- Kafka Producer: Kafka producer client metrics. +- Kafka Consumer: Kafka consumer client metrics. +- Kafka Quotas: Kafka quotas and throttling metrics. + +## How to build + +Install `grafanalib` library: + +```shell +pip3 install grafanalib +``` + +Run makefile: + +```shell +make +``` + +This execution generates the grafana dashboard JSON files on directories `default/` for Docker/VM-based deployments and `cfk/` for Confluent-for-Kubernetes-based deployments. + +## How to use + +Grafana dashboards expect the following labels: + +- Environment: + - Default: `env` + - CFK: `namespace` +- Server label: + - Default: `hostname` + - CFK: `pod` +- Cluster labels: + - Connect: + - Default: `kafka_connect_cluster_id` + - CFK: `app` + - ksqlDB: + - Default: `ksqldb_cluster_id` + - CFK: `app` + diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py new file mode 100644 index 00000000..ab6ff90e --- /dev/null +++ b/grafana-dashboards/confluent-platform.py @@ -0,0 +1,706 @@ +import os +import grafanalib.core as G + + +def dashboard( + ds="Prometheus", + env_label="namespace", + server_label="pod", + connect_cluster_label="app", + ksqldb_cluster_label="app", +): + """ + Confluent Platform dashboard + It includes all Confluent components: + - Zookeeper + - Kafka + - Schema Registry + - Kafka Connect (repeated per cluster) + - ksqlDB (repeated per cluster) + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes + default_height = 5 + stat_width = 4 + + # Queries + by_env = env_label + '="$env"' + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="connect_cluster", + label="Kafka Connect cluster", + dataSource=ds, + query="label_values(kafka_connect_connect_worker_metrics_connector_count{" + + by_env + + "}, " + + connect_cluster_label + + ")", + hide=True, + ), + G.Template( + name="ksqldb_cluster", + label="ksqlDB cluster", + dataSource=ds, + query="label_values(ksql_ksql_engine_query_stats_liveness_indicator{" + + by_env + + "}, " + + ksqldb_cluster_label + + ")", + hide=True, + ), + ] + ) + + # Panel groups + ## Zookeeper panes: + ### When updating descriptions on these panels, also update descriptions in zookeeper-cluster.py + zk_panels = [ + G.RowPanel( + title="Zookeeper cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="ZK: Quorum Size", + description="""Quorum Size of Zookeeper ensemble. + Count Zookeeper servers with quorum size metric. + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(zookeeper_status_quorumsize{" + by_env + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=2.0, color="yellow"), + G.Threshold(index=2, value=3.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="ZK: ZNodes (avg.)", + description="""Average size of ZNodes in the cluster. + Getting the node count per server, and averaging the node count. + """, + dataSource=ds, + targets=[ + G.Target( + expr="avg(zookeeper_inmemorydatatree_nodecount{" + by_env + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="ZK: Connections used", + description="""Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host. + If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened. + """, + dataSource=ds, + targets=[ + G.Target( + expr="zookeeper_numaliveconnections{" + + by_env + + "} / zookeeper_maxclientcnxnsperhost{" + + by_env + + "}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=0.6, color="yellow"), + G.Threshold(index=2, value=0.8, color="red"), + ], + format="percentunit", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="ZK: Sum of watchers", + description="""Sum of client watchers subscribed to changes on the ZNodes. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(zookeeper_inmemorydatatree_watchcount{" + by_env + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + ), + G.TimeSeries( + title="ZK: Outstanding Requests", + description="""Number of requests waiting for processing (queued). + If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked. + It could mean that there is not enough resources to cope with the number of requests. + """, + dataSource=ds, + targets=[ + G.Target( + expr="zookeeper_outstandingrequests{" + by_env + "}", + legendFormat="{{pod}} ({{server_id}}:{{member_type}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "last"], + legendPlacement="right", + gridPos=G.GridPos(h=default_height, w=8, x=stat_width * 4, y=0), + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=10.0, color="red"), + ], + ), + ] + + ## Kafka panels + ### When updating descriptions on these panels, also update descriptions in kafka-cluster.py + kafka_panels = [ + G.RowPanel( + title="Kafka cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.Stat( + title="Kafka: Online Brokers", + description="""Count of brokers available (online). + This value is referential and should not be used for alerting. + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(kafka_server_replicamanager_leadercount{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=1), + ), + G.Stat( + title="Kafka: Active Controller", + description="""Active Controller broker. + It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_controller_kafkacontroller_activecontrollercount{" + + by_env + + "} > 0", + legendFormat="{{" + server_label + "}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions", + description="""Sum of Topic partitions across the cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_replicamanager_partitioncount{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=1), + ), + G.Stat( + title="Kafka: Sum of Under-Replicated Partitions (URP)", + description="""Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions. + There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_replicamanager_underreplicatedpartitions{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=1), + ), + G.Stat( + title="Kafka: Sum of Under-MinISR Partitions", + description="""Number of partitions where the number of replicas offline is higher than the minimum ISR configuration. + This means partitions are not available for Producers with acks=all. + It's recommended alerting when this values is higher than 0. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_cluster_partition_underminisr{" + by_env + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=1), + ), + G.Stat( + title="Kafka: Sum of Offline Partitions", + description="""Number of partitions where all replicas are offline. + Producers and Consumers are affected by this condition. + It's recommended alerting when this values is higher than 0. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_controller_kafkacontroller_offlinepartitionscount{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=1), + ), + ] + + ## Schema Registry panels: + ### When updating descriptions on these panels, also update descriptions in schema-registry-cluster.py + sr_panels = [ + G.RowPanel( + title="Schema Registry cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + ), + G.Stat( + title="SR: Online instances", + description="""Schema Registry online instances returning metrics. + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(kafka_schema_registry_registered_count{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=2.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=2), + ), + G.Stat( + title="SR: Registered Schemas (avg.)", + description="""Average number of registered schemas across the cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="avg(kafka_schema_registry_registered_count{" + by_env + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=2), + ), + G.Stat( + title="SR: Schemas Created by Type (avg.)", + description="""Average number of schemas created, by type. + """, + dataSource=ds, + targets=[ + G.Target( + expr="avg(kafka_schema_registry_schemas_created{" + + by_env + + "}) by (schema_type)", + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=2), + ), + G.Stat( + title="SR: Schemas Deleted by Type (avg.)", + description="""Average number of schemas deleted, by type. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_schema_registry_schemas_deleted{" + + by_env + + "}) by (schema_type)", + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=2), + ), + ] + + ## Kafka Connect cluster panels: + ### When updating descriptions on these panels, also update descriptions in kafka-connect-cluster.py + connect_inner = [ + G.Stat( + title="Connect: Online Workers", + description="""Kafka Connect online workers returning metrics. + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(kafka_connect_connect_worker_metrics_connector_count{" + + by_env + + "," + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=3), + ), + G.Stat( + title="Connect: Sum of Total Tasks", + description="""Number of tasks deployed on Kafka Connect cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + by_env + + "," + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=3), + ), + G.Stat( + title="Connect: Sum of Running Tasks", + description="""Number of Running Tasks on the Kafka Connect cluster. + Ideally, this number should be equal to the total number of tasks deployed. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_running_task_count{" + + by_env + + "," + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=3), + ), + G.Stat( + title="Connect: Sum of Paused Tasks", + description="""Number of Paused Tasks on the Kafka Connect cluster. + Ideally, this number should be zero, as tasks should be running. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{" + + by_env + + "," + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=3), + ), + G.Stat( + title="Connect: Sum of Failed Tasks", + description="""Number of Paused Tasks on the Kafka Connect cluster. + Ideally, this number should be zero, as tasks should be running. + It's recommended alerting when this value is higher than 0. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{" + + by_env + + "," + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=3), + ), + G.Stat( + title="Connect: Time since last rebalance", + description="""Informative value. Time since last rebalance. + When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{" + + by_env + + "," + + connect_cluster_label + + '=~"$connect_cluster"} >= 0', + legendFormat="{{pod}}", + ), + ], + reduceCalc="last", + format="clockms", + graphMode="none", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=3), + ), + ] + ### Repeat as there could be multiple connect clusters per environment. + connect_panels = [ + G.RowPanel( + title="Kafka Connect cluster: $connect_cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=3), + repeat=G.Repeat(variable="connect_cluster"), + collapsed=True, + panels=connect_inner, + ), + ] + + ## ksqlDB cluster panels: + ### When updating descriptions on these panels, also update descriptions in ksqldb-cluster.py + ksqldb_inner = [ + G.Stat( + title="ksqlDB: Online instances", + description="""ksqlDB online instances returning metrics. + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(ksql_ksql_engine_query_stats_num_active_queries{" + + by_env + + "," + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Active Queries", + description="""Number of active queries deployed in the cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_num_active_queries{" + + by_env + + "," + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Running Queries", + description="""Number of running queries deployed in the cluster. + Ideally, this number should be equal to the number of active queries as queries should be running. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_running_queries{" + + by_env + + "," + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Rebalancing Queries", + description="""Number of queries rebalancing in the cluster. + Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute). + It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_rebalancing_queries{" + + by_env + + "," + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=4), + ), + G.Stat( + title="Connect: Sum of Queries Failed", + description="""Number of queries failed in the cluster. + Ideally, this number should be equal zero. + It's recommended to alert if the number of queries failed is higher than 0. + """, + dataSource=ds, + targets=[ + G.Target( + expr="avg(ksql_ksql_engine_query_stats_error_queries{" + + by_env + + "," + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=4), + ), + ] + ### Repeat as there could be multiple ksqldb clusters per environment. + ksqldb_panels = [ + G.RowPanel( + title="ksqlDB cluster: $ksqldb_cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=4), + repeat=G.Repeat(variable="ksqldb_cluster"), + collapsed=True, + panels=ksqldb_inner, + ), + ] + + # group all panels + panels = zk_panels + kafka_panels + sr_panels + connect_panels + ksqldb_panels + + # build dashboard + return G.Dashboard( + title="Confluent Platform overview", + description="Overview of the main health-check metrics from Confluent Platform components.", + tags=[ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb", + ], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +connect_cluster_label = os.environ.get( + "CONNECT_CLUSTER_LABEL", "kafka_connect_cluster_id" +) +ksqldb_cluster_label = os.environ.get("KSQLDB_CLUSTER_LABEL", "ksqldb_cluster_id") + +# dashboard required by grafanalib +dashboard = dashboard( + ds, env_label, server_label, connect_cluster_label, ksqldb_cluster_label +) diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py new file mode 100644 index 00000000..53ccd5d3 --- /dev/null +++ b/grafana-dashboards/kafka-cluster.py @@ -0,0 +1,1309 @@ +import os +import grafanalib.core as G + + +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka cluster dashboard + It includes: + - Cluster overview + - System resources + - Throughput + - Thread utilization + - Request rates + - Connections + - In-Sync Replicas + - Request latency: Producer + - Request latency: Consumer Fetch + - Request latency: Follower Fetch + - Group Coordinator + - Message Conversion + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes + default_height = 5 + stat_width = 4 + ts_width = 8 + + # Queries + by_env = env_label + '="$env"' + by_server = by_env + "," + server_label + '=~"$broker"' + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="broker", + label="Broker", + dataSource=ds, + query="label_values(kafka_server_replicamanager_leadercount{" + + by_env + + "}, " + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="quantile", + label="Quantile", + dataSource=ds, + query="label_values(kafka_network_requestmetrics_requestqueuetimems{" + + by_env + + "}, quantile)", + ), + ] + ) + + # Panel groups + ## Cluster overview: + ### When updating descriptions on these panels, also update descriptions in confluent-platform.py + overview_panels = [ + G.RowPanel( + title="Cluster Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + # First group of stats + G.Stat( + title="Kafka: Online Brokers", + description="""Count of brokers available (online). + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(kafka_server_replicamanager_leadercount{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="Kafka: Active Controller", + description="""Active Controller broker. + It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_controller_kafkacontroller_activecontrollercount{" + + by_env + + "} > 0", + legendFormat="{{" + server_label + "}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="Kafka: Sum of Preferred Replica Imbalance", + description=""" + Number of partitions where the preferred replica is not the leader. + Usually, this number is 0. + Restarting nodes could cause this values to change, but when reassigning happens the value stabilize. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="Kafka: Sum of Topics", + description="Number of topics in the cluster.", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_controller_kafkacontroller_globaltopiccount{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=1), + ), + G.Stat( + title="Kafka: Rate of Requests/Sec", + description="""Sum of requests per second rated over a 5 min. period. + Gives an idea of the processing load in the cluster.""", + dataSource=ds, + targets=[ + G.Target( + expr="sum(rate(kafka_network_requestmetrics_requestspersec{" + + by_server + + "}[5m]))", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="reqps", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=0), + ), + G.Stat( + title="Kafka: Log Size", + description="""Sum of log sizes per broker. + This must be compared with the total storage space available in the brokers.""", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_log_log_size{" + + by_server + + "}) by (" + + server_label + + ")", + legendFormat="{{" + server_label + "}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="bytes", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=0), + ), + # Second group of stats + G.Stat( + title="Kafka: Sum of Partitions", + description="""Sum of Topic partitions across the cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_replicamanager_partitioncount{" + + by_server + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=1), + ), + G.Stat( + title="Kafka: Sum of Under-Replicated Partitions (URP)", + description="""Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions. + There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_replicamanager_underreplicatedpartitions{" + + by_server + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=1), + ), + G.Stat( + title="Kafka: Sum of Under-MinISR Partitions", + description="""Number of partitions where the number of replicas offline is higher than the minimum ISR configuration. + This means partitions are not available for Producers with acks=all. + We recommend alerting when this values is higher than 0. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_cluster_partition_underminisr{" + by_server + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=1), + ), + G.Stat( + title="Kafka: Sum of Offline Partitions", + description="""Number of partitions where all replicas are offline. + Producers and Consumers are affected by this condition. + We recommend alerting when this values is higher than 0. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_controller_kafkacontroller_offlinepartitionscount{" + + by_server + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=1), + ), + G.Stat( + title="Kafka: Bytes In/Sec", + description="""Sum of bytes in per second rated over a 5 min. period. + Gives an idea of the incoming throughput handle by the cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{" + + by_server + + "}[5m]))", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="binBps", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=1), + ), + G.Stat( + title="Kafka: Bytes Out/Sec", + description="""Sum of bytes out per second rated over a 5 min. period. + Gives an idea of the outgoing throughput handle by the cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{" + + by_server + + "}[5m]))", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="binBps", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=1), + ), + ] + + ## System resources: + ### When updating descriptions on these panels, also update descriptions in other cluster dashboards + system_base = 2 + system_panels = [ + G.RowPanel( + title="System resources", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="CPU usage", + description="""Rate of CPU seconds used by the Java process. + 100% usage represents one core. + If there are multiple cores, the total capacity should be 100% * number_cores.""", + dataSource=ds, + targets=[ + G.Target( + expr="irate(process_cpu_seconds_total{" + by_server + "}[5m])", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=system_base + ), + ), + G.TimeSeries( + title="Memory usage", + description="""Sum of JVM memory used, without including areas (e.g. heap size).""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(area)(jvm_memory_bytes_used{" + by_server + "})", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=system_base + ), + ), + G.TimeSeries( + title="GC collection", + description="""Sum of seconds used by Garbage Collection.""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + by_server + + "}[5m]))", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=system_base + ), + ), + ] + + ## Throughput: + throughtput_base = system_base + 1 + throughput_inner = [ + G.TimeSeries( + title="Messages In/Sec", + description="""Number of messages into topics per second, aggregated by sum without topic.""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{" + + by_server + + "}[5m]))", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=throughtput_base + ), + ), + G.TimeSeries( + title="Bytes In/Sec", + description="""Number of bytes into topics per second, aggregated by sum without topic.""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{" + + by_server + + "}[5m]))", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=throughtput_base + ), + ), + G.TimeSeries( + title="Bytes Out/Sec", + description="""Number of bytes out of topics per second, aggregated by sum without topic.""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{" + + by_server + + "}[5m]))", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=throughtput_base + ), + ), + ] + throughput_panels = [ + G.RowPanel( + title="Throughput", + gridPos=G.GridPos(h=1, w=24, x=0, y=throughtput_base), + collapsed=True, + panels=throughput_inner, + ), + ] + + ## Thread utilization: + thread_base = throughtput_base + 1 + thread_inner = [ + G.TimeSeries( + title="Network processor usage", + description="""Percent of time the network thread pool is used. + It should be below 60% or the capacity of threads should be tuned or + the cluster scaled to cope with the load.""", + dataSource=ds, + targets=[ + G.Target( + expr="1-kafka_network_socketserver_networkprocessoravgidlepercent{" + + by_server + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=thread_base + ), + ), + G.TimeSeries( + title="Request processor (IO) usage", + description="""Percent of time the IO thread pool is used. + It should be below 60% or the capacity of threads should be tuned or + the cluster scaled to cope with the load.""", + dataSource=ds, + targets=[ + G.Target( + expr="1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{" + + by_server + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=thread_base + ), + ), + ] + thread_panels = [ + G.RowPanel( + title="Thread utilization", + gridPos=G.GridPos(h=1, w=24, x=0, y=thread_base), + collapsed=True, + panels=thread_inner, + ), + ] + + ## Request rates: + request_base = thread_base + 1 + ### It has the special case of aggregating across the cluster. + ### As the number of labels is unknown and could be extended depending on the platform. + ### At the moment includes known labels: instance, pod, and stateful_kubernetes_io_pod_name + known_labels = "pod,instance,statefulset_kubernetes_io_pod_name" + request_inner = [ + G.TimeSeries( + title="Requests rates", + description="""Requests per second rated over a 5 minutes period. + Includes API call and version. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum without(" + + known_labels + + ")(rate(kafka_network_requestmetrics_requestspersec{" + + by_server + + "}[5m]))", + legendFormat="{{request}}(v{{version}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=request_base + ), + stacking={"mode": "normal", "group": "A"}, + ), + G.TimeSeries( + title="Error rates", + description="""Request Errors per second rated over a 5 minutes period. + Includes API call and version. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum without(" + + known_labels + + ")(rate(kafka_network_requestmetrics_errorspersec{" + + by_server + + ',error!="NONE"}[5m]))', + legendFormat="{{error}}@{{request}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=request_base + ), + stacking={"mode": "normal", "group": "A"}, + ), + ] + request_panels = [ + G.RowPanel( + title="Request rates", + gridPos=G.GridPos(h=1, w=24, x=0, y=request_base), + collapsed=True, + panels=request_inner, + ), + ] + + ## Connections: + connection_base = request_base + 1 + connection_inner = [ + G.TimeSeries( + title="Sum of Connections alive per Broker", + description="Sum of connections count across cluster by brokers", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_count{" + + by_server + + "}) by (" + + server_label + + ")", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + ), + ), + G.TimeSeries( + title="Sum of Connections creation rate per Broker", + description="Sum of rate of connections created across cluster by brokers", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_creation_rate{" + + by_server + + "}) by (" + + server_label + + ")", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + ), + ), + G.TimeSeries( + title="Sum of Connections close rate per Broker", + description="Sum of rate of connections closed across cluster by brokers", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_close_rate{" + + by_server + + "}) by (" + + server_label + + ")", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + ), + ), + # By Listener + G.TimeSeries( + title="Sum of Connections alive per Listener", + description="Sum of connections count across cluster by listeners", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_count{" + + by_server + + "}) by (listener)", + legendFormat="{{listener}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="Sum of Connections creation rate per Listener", + description="Sum of rate of connections created across cluster by listener", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_creation_rate{" + + by_server + + "}) by (listener)", + legendFormat="{{listener}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="Sum of Connections close rate per Listener", + description="Sum of rate of connections closed across cluster by listener", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_close_rate{" + + by_server + + "}) by (listener)", + legendFormat="{{listener}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + 1 + ), + ), + ] + connection_panels = [ + G.RowPanel( + title="Connections", + gridPos=G.GridPos(h=1, w=24, x=0, y=connection_base), + collapsed=True, + panels=connection_inner, + ), + ] + + ## In-Sync Replicas: + isr_base = connection_base + 2 + isr_inner = [ + G.TimeSeries( + title="Rate of ISR Shrinks/sec", + description="""Rate of ISR shrinks per second. + If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly. + """, + dataSource=ds, + targets=[ + G.Target( + expr="rate(kafka_server_replicamanager_isrshrinkspersec{" + + by_server + + "}[5m])", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=isr_base + ), + ), + G.TimeSeries( + title="Rate of ISR Expands/sec", + description="""Rate of ISR expands per second. + If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly. + """, + dataSource=ds, + targets=[ + G.Target( + expr="rate(kafka_server_replicamanager_isrexpandspersec{" + + by_server + + "}[5m])", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=isr_base + ), + ), + ] + isr_panels = [ + G.RowPanel( + title="In-Sync Replicas", + gridPos=G.GridPos(h=1, w=24, x=0, y=isr_base), + collapsed=True, + panels=isr_inner, + ), + ] + + ## Request latency for Produce: + ### When changing these panels, also modify Consumer Fetch and Follower Fetch. + producer_base = isr_base + 1 + producer_inner = [ + G.TimeSeries( + title="Produce: Request Queue Time", + description="""Time expend on the request queue. + Moved from network socket to request queue by Network threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_requestqueuetimems{" + + by_server + + ',quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=producer_base + ), + ), + G.TimeSeries( + title="Produce: Local Time", + description="""Time expend doing local IO. + Moved from request queue to storage device operations by IO threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_localtimems{" + + by_server + + ',quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=producer_base + ), + ), + G.TimeSeries( + title="Produce: Remote Time", + description="""Time expend waiting for coordination with other brokers/internal condition. + At purgatory. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_remotetimems{" + + by_server + + ',quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=producer_base + ), + ), + G.TimeSeries( + title="Produce: Response Queue Time", + description="""Time expend waiting in response queue. + Moved from purgatory to response queue by IO threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsequeuetimems{" + + by_server + + ',quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=producer_base + 1 + ), + ), + G.TimeSeries( + title="Produce: Response Send Time", + description="""Time expend delivering response. + Moved from response queue to client by Networkc threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsesendtimems{" + + by_server + + ',quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=producer_base + 1 + ), + ), + ] + producer_panels = [ + G.RowPanel( + title="Request latency: Producer", + gridPos=G.GridPos(h=1, w=24, x=0, y=producer_base), + collapsed=True, + panels=producer_inner, + ), + ] + + ## Request latency for Consumer Fetch: + ### When changing these panels, also modify Produce and Follower Fetch. + consumer_base = producer_base + 2 + consumer_inner = [ + G.TimeSeries( + title="Fetch: Request Queue Time", + description="""Time expend on the request queue. + Moved from network socket to request queue by Network threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_requestqueuetimems{" + + by_server + + ',quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=consumer_base + ), + ), + G.TimeSeries( + title="Fetch: Local Time", + description="""Time expend doing local IO. + Moved from request queue to storage device operations by IO threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_localtimems{" + + by_server + + ',quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=consumer_base + ), + ), + G.TimeSeries( + title="Fetch: Remote Time", + description="""Time expend waiting for coordination with other brokers/internal condition. + At purgatory. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_remotetimems{" + + by_server + + ',quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=consumer_base + ), + ), + G.TimeSeries( + title="Fetch: Response Queue Time", + description="""Time expend waiting in response queue. + Moved from purgatory to response queue by IO threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsequeuetimems{" + + by_server + + ',quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=consumer_base + 1 + ), + ), + G.TimeSeries( + title="Fetch: Response Send Time", + description="""Time expend delivering response. + Moved from response queue to client by Networkc threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsesendtimems{" + + by_server + + ',quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=consumer_base + 1 + ), + ), + ] + consumer_panels = [ + G.RowPanel( + title="Request latency: Consumer Fetch", + gridPos=G.GridPos(h=1, w=24, x=0, y=consumer_base), + collapsed=True, + panels=consumer_inner, + ), + ] + + ## Request latency for Follower Fetch: + ### When changing these panels, also modify Produce and Consumer Fetch. + replication_base = consumer_base + 2 + replication_inner = [ + G.TimeSeries( + title="Fetch: Request Queue Time", + description="""Time expend on the request queue. + Moved from network socket to request queue by Network threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_requestqueuetimems{" + + by_server + + ',quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=replication_base + ), + ), + G.TimeSeries( + title="Fetch: Local Time", + description="""Time expend doing local IO. + Moved from request queue to storage device operations by IO threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_localtimems{" + + by_server + + ',quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=replication_base + ), + ), + G.TimeSeries( + title="Fetch: Remote Time", + description="""Time expend waiting for coordination with other brokers/internal condition. + At purgatory. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_remotetimems{" + + by_server + + ',quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=replication_base + ), + ), + G.TimeSeries( + title="Fetch: Response Queue Time", + description="""Time expend waiting in response queue. + Moved from purgatory to response queue by IO threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsequeuetimems{" + + by_server + + ',quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=replication_base + 1 + ), + ), + G.TimeSeries( + title="Fetch: Response Send Time", + description="""Time expend delivering response. + Moved from response queue to client by Networkc threads. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsesendtimems{" + + by_server + + ',quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=replication_base + 1 + ), + ), + ] + replication_panels = [ + G.RowPanel( + title="Request latency: Replica Fetch", + gridPos=G.GridPos(h=1, w=24, x=0, y=replication_base), + collapsed=True, + panels=replication_inner, + ), + ] + + ## Group Coordination: + group_base = replication_base + 2 + group_inner = [ + G.TimeSeries( + title="Number of Groups per Broker", + description="Number of groups managed by Broker", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_coordinator_group_groupmetadatamanager_numgroups{" + + by_server + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=group_base + ), + ), + G.TimeSeries( + title="Number of Groups per Broker per Status", + description="Number of stable groups managed by Broker", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{" + + by_server + + "})", + legendFormat="stable", + ), + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{" + + by_server + + "})", + legendFormat="preparing_rebalance", + ), + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{" + + by_server + + "})", + legendFormat="dead", + ), + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{" + + by_server + + "})", + legendFormat="completing_rebalance", + ), + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{" + + by_server + + "})", + legendFormat="empty", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=group_base + ), + ), + ] + group_panels = [ + G.RowPanel( + title="Group Coordinator", + gridPos=G.GridPos(h=1, w=24, x=0, y=group_base), + collapsed=True, + panels=group_inner, + ), + ] + + ## Conversion: + conversion_base = group_base + 1 + conversion_inner = [ + G.TimeSeries( + title="Sum of Produce conversion rate per sec", + description="""Sum of produce message conversions per second. + This value increases when the broker receives produce messages from clients using older versions. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{" + + by_server + + "})", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="opsps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=conversion_base + ), + ), + G.TimeSeries( + title="Sum of Fetch conversion rate per sec", + description="""Sum of fetch message conversions per second. + This value increases when the broker receives fetch messages from clients using older versions. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{" + + by_server + + "})", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="opsps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=conversion_base + ), + ), + G.TimeSeries( + title="Sum of Connections per version", + description="""Sum of connections aggregated by client version and name. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connections{" + + by_server + + "}) by (client_software_name,client_software_version)", + legendFormat="{{client_software_name}} (v{{client_software_version}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=conversion_base + ), + ), + ] + conversion_panels = [ + G.RowPanel( + title="Message Conversion", + gridPos=G.GridPos(h=1, w=24, x=0, y=conversion_base), + collapsed=True, + panels=conversion_inner, + ), + ] + + # group all panels + panels = ( + overview_panels + + system_panels + + throughput_panels + + thread_panels + + request_panels + + connection_panels + + isr_panels + + producer_panels + + consumer_panels + + replication_panels + + group_panels + + conversion_panels + ) + + # build dashboard + return G.Dashboard( + title="Kafka cluster", + description="Overview of the Kafka cluster", + tags=["confluent", "kafka"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/kafka-connect-cluster.py b/grafana-dashboards/kafka-connect-cluster.py new file mode 100644 index 00000000..1805dc75 --- /dev/null +++ b/grafana-dashboards/kafka-connect-cluster.py @@ -0,0 +1,1096 @@ +import os +import grafanalib.core as G + + +def dashboard( + ds="Prometheus", + env_label="namespace", + server_label="' + server_label + '", + connect_cluster_label="app", +): + """ + Kafka Connect cluster dashboard + It includes: + - Cluster overview + - System resources + - Connect workers + - Tasks + - Task Errors + - Source Tasks + - Sink Tasks + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes + default_height = 5 + stat_width = 4 + ts_width = 8 + + # Queries + by_env = env_label + '="$env"' + by_cluster = by_env + "," + connect_cluster_label + '="$connect_cluster"' + by_server = by_cluster + "," + server_label + '=~"$connect_worker"' + by_connector = by_server + ',connector=~"$connector"' + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="connect_cluster", + label="Connect cluster", + dataSource=ds, + query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + by_env + + "}, " + + connect_cluster_label + + ")", + ), + G.Template( + name="connect_worker", + label="Connect worker", + dataSource=ds, + query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + by_cluster + + "}, " + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="connector", + label="Connector", + dataSource=ds, + query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + by_cluster + + "}, connector)", + multi=True, + includeAll=True, + ), + ] + ) + + # Panel groups + ## Cluster overview: + ### When updating descriptions on these panels, also update descriptions in confluent-platform.py + overview_base = 0 + overview_panels = [ + G.RowPanel( + title="Cluster Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=overview_base), + ), + # First level + G.Stat( + title="Connect: Online Workers", + description="""Kafka Connect online workers returning metrics. + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(kafka_connect_app_info{" + + by_cluster + + ',version!=""})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 0, y=overview_base + ), + ), + G.Stat( + title="Connect: Sum of Total Tasks", + description="""Number of tasks deployed on Kafka Connect cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 1, y=overview_base + ), + ), + G.Stat( + title="Connect: Sum of Running Tasks", + description="""Number of Running Tasks on the Kafka Connect cluster. + Ideally, this number should be equal to the total number of tasks deployed. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_running_task_count{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 2, y=overview_base + ), + ), + G.Stat( + title="Connect: Sum of Paused Tasks", + description="""Number of Paused Tasks on the Kafka Connect cluster. + Ideally, this number should be zero, as tasks should be running. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 3, y=overview_base + ), + ), + G.Stat( + title="Connect: Sum of Failed Tasks", + description="""Number of Paused Tasks on the Kafka Connect cluster. + Ideally, this number should be zero, as tasks should be running. + It's recommended alerting when this value is higher than 0. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 4, y=overview_base + ), + ), + G.Stat( + title="Connect: Time since last rebalance", + description="""Informative value. Time since last rebalance. + When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{" + + by_cluster + + "} >= 0", + legendFormat="{{" + server_label + "}}", + ), + ], + reduceCalc="last", + format="clockms", + graphMode="none", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 5, y=overview_base + ), + ), + # Second level + G.Table( + title="Connect Workers", + description="""Connect workers metadata and stats. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_app_info{" + by_cluster + ',start_time_ms!=""}', + format="table", + instant=True, + ), + G.Target( + expr="kafka_connect_app_info{" + by_cluster + ',version!=""}', + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_connector_count{" + + by_cluster + + "})", + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_connector_startup_success_total{" + + by_cluster + + "})", + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_connector_startup_failure_total{" + + by_cluster + + "})", + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_task_count{" + + by_cluster + + "})", + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_task_startup_success_total{" + + by_cluster + + "})", + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_task_startup_failure_total{" + + by_cluster + + "})", + format="table", + instant=True, + ), + ], + transformations=[ + {"id": "seriesToColumns", "options": {"byField": server_label}}, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + server_label, + connect_cluster_label + " 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + env_label + " 1", + ] + } + }, + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + connect_cluster_label + " 1": 1, + env_label + " 1": 0, + server_label: 2, + "start_time_ms": 3, + "version": 4, + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + connect_cluster_label + " 1": "cluster", + env_label + " 1": "environment", + server_label: "worker", + "start_time_ms": "start time", + "version": "version", + }, + }, + }, + ], + gridPos=G.GridPos(h=default_height, w=24, x=0, y=overview_base + 1), + ), + # Third level + G.Table( + title="Connectors", + description="""Connectors deployed and task stats. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connector_info{" + by_cluster + "}", + format="table", + instant=True, + ), + G.Target( + expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{" + + by_cluster + + "})", + format="table", + instant=True, + ), + G.Target( + expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{" + + by_cluster + + "})", + format="table", + instant=True, + ), + G.Target( + expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{" + + by_cluster + + "})", + format="table", + instant=True, + ), + G.Target( + expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{" + + by_cluster + + "})", + format="table", + instant=True, + ), + ], + transformations=[ + {"id": "seriesToColumns", "options": {"byField": "connector"}}, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E", + ] + } + }, + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused", + }, + }, + }, + ], + gridPos=G.GridPos(h=default_height, w=24, x=0, y=overview_base + 2), + ), + # Forth level + G.TimeSeries( + title="Tasks Running Ratio", + description="""How much time the connector tasks are in running state. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_running_ratio{" + + by_cluster + + "}", + legendFormat="{{connector}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=12, x=0, y=overview_base + 3), + ), + G.TimeSeries( + title="Rebalance Latency (avg.)", + description="""Average ime spent on rebalance state. + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{" + + by_cluster + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=12, x=12, y=overview_base + 3), + ), + ] + + ## System resources: + ### When updating descriptions on these panels, also update descriptions in other cluster dashboards + system_base = overview_base + 4 + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="CPU usage", + description="""Rate of CPU seconds used by the Java process. + 100% usage represents one core. + If there are multiple cores, the total capacity should be 100% * number_cores.""", + dataSource=ds, + targets=[ + G.Target( + expr="irate(process_cpu_seconds_total{" + by_server + "}[5m])", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=system_base + ), + ), + G.TimeSeries( + title="Memory usage", + description="""Sum of JVM memory used, without including areas (e.g. heap size).""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(area)(jvm_memory_bytes_used{" + by_server + "})", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=system_base + ), + ), + G.TimeSeries( + title="GC collection", + description="""Sum of seconds used by Garbage Collection.""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + by_server + + "}[5m]))", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=system_base + ), + ), + ] + + ## Workers: + worker_base = system_base + 1 + worker_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + description="Incoming byte rate per second per worker.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_incoming_byte_rate{" + + by_server + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=worker_base + ), + ), + G.TimeSeries( + title="Outgoing Byte Rate", + description="Outgoing byte rate per second per worker.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_outgoing_byte_rate{" + + by_server + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=worker_base + ), + ), + G.TimeSeries( + title="IO Ratio", + description="Fraction of time the I/O thread spent doing I/O", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_io_ratio{" + by_server + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=worker_base + 1 + ), + ), + G.TimeSeries( + title="Network IO Rate", + description="Average number of network operations (reads or writes) on all connections per second", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_network_io_rate{" + + by_server + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=worker_base + 1 + ), + ), + G.TimeSeries( + title="Active Connections", + description="Number of active connections", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_connection_count{" + + by_server + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=worker_base + 2 + ), + ), + G.TimeSeries( + title="Rate of Authentication", + description="Successful and failed authentications per second.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_successful_authentication_rate{" + + by_server + + "}", + legendFormat="{{" + server_label + "}} (success)", + ), + G.Target( + expr="kafka_connect_connect_metrics_failed_authentication_total{" + + by_server + + "}", + legendFormat="{{" + server_label + "}} (failed)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=worker_base + 2 + ), + ), + ] + worker_panels = [ + G.RowPanel( + title="Connect Workers", + gridPos=G.GridPos(h=1, w=24, x=0, y=worker_base), + collapsed=True, + panels=worker_inner, + ), + ] + + ## Tasks: + tasks_base = worker_base + 1 + tasks_inner = [ + G.TimeSeries( + title="Batch size", + description="Maximum and average size of the batches processed by the connector task.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_batch_size_avg{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (avg.)", + ), + G.Target( + expr="kafka_connect_connector_task_metrics_batch_size_max{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=tasks_base + ), + ), + G.TimeSeries( + title="Offset commit success/failure", + description="Percentage of offset commit successful and failed.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_offset_commit_success_percentage{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (success)", + ), + G.Target( + expr="kafka_connect_connector_task_metrics_offset_commit_failure_percentage{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (failure)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=tasks_base + ), + ), + G.TimeSeries( + title="Offset commit latency", + description="Average and Maximum time in milliseconds taken by the task to commit offsets", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (avg.)", + ), + G.Target( + expr="kafka_connect_connector_task_metrics_offset_commit_max_time_ms{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=tasks_base + ), + ), + ] + tasks_panels = [ + G.RowPanel( + title="Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=tasks_base), + collapsed=True, + panels=tasks_inner, + ), + ] + + ## Task Errors: + task_errors_base = tasks_base + 1 + task_errors_inner = [ + # First layer + G.TimeSeries( + title="Total Record Failures", + description="Total number of failures seen by task.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_record_failures{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=task_errors_base + ), + ), + G.TimeSeries( + title="Total Record Error", + description="Total number of errors seen by task.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_record_errors{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=task_errors_base + ), + ), + G.TimeSeries( + title="Total Records Skipped", + description="Total number of records skipped seen by task.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_records_skipped{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=task_errors_base + ), + ), + # Second layer + G.TimeSeries( + title="Total Errors Logged", + description="Total number of records logged seen by task.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_errors_logged{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=task_errors_base + 1 + ), + ), + G.TimeSeries( + title="Total Retries", + description="Total number of retries seen by task.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_retries{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=task_errors_base + 1 + ), + ), + G.TimeSeries( + title="Dead Letter Topic Requests", + description="Number of produce requests to dead letter topics.", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_deadletterqueue_produce_requests{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=task_errors_base + 1 + ), + ), + ] + task_errors_panels = [ + G.RowPanel( + title="Task Errors", + gridPos=G.GridPos(h=1, w=24, x=0, y=task_errors_base), + collapsed=True, + panels=task_errors_inner, + ), + ] + + ## Source tasks: + source_base = task_errors_base + 2 + source_inner = [ + G.TimeSeries( + title="Poll Batch Latency", + description="Average and Maximum time in milliseconds taken by this task to poll for a batch of source records", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_source_task_metrics_poll_batch_avg_time_ms{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (avg.)", + ), + G.Target( + expr="kafka_connect_source_task_metrics_poll_batch_max_time_ms{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=source_base + ), + ), + G.TimeSeries( + title="Source Record Poll Rate", + description="""Before transformations are applied, + this is the average per-second number of records produced or + polled by the task belonging to the named source connector in the worker + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_source_task_metrics_source_record_poll_rate{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=source_base + ), + ), + G.TimeSeries( + title="Source Record Write Rate", + description="""After transformations are applied, + this is the average per-second number of records output from the transformations and + written to Kafka for the task belonging to the named source connector in the worker + (excludes any records filtered out by the transformations) + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_source_task_metrics_source_record_write_rate{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=source_base + ), + ), + ] + source_panels = [ + G.RowPanel( + title="Source Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=source_base), + collapsed=True, + panels=source_inner, + ), + ] + + ## Sink tasks: + sink_base = source_base + 1 + sink_inner = [ + G.TimeSeries( + title="Put Batch Latency", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_sink_task_metrics_put_batch_avg_time_ms{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (avg.)", + ), + G.Target( + expr="kafka_connect_sink_task_metrics_put_batch_max_time_ms{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=sink_base + ), + ), + G.TimeSeries( + title="Sink Record Read Rate", + description="""Before transformations are applied, + this is the average per-second number of records read from Kafka + for the task belonging to the named sink connector in the worker + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_sink_task_metrics_sink_record_read_rate{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=sink_base + ), + ), + G.TimeSeries( + title="Sink Record Send Rate", + description="""After transformations are applied, + this is the average per-second number of records output from the transformations and + sent to the task belonging to the named sink connector in the worker + (excludes any records filtered out by the transformations) + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_sink_task_metrics_sink_record_send_rate{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=sink_base + ), + ), + G.TimeSeries( + title="Partition Count", + description="""Number of topic partitions assigned to the task and + which belong to the named sink connector in the worker + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_sink_task_metrics_partition_count{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=sink_base + 1 + ), + ), + ] + sink_panels = [ + G.RowPanel( + title="Sink Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=sink_base), + collapsed=True, + panels=sink_inner, + ), + ] + + # group all panels + panels = ( + overview_panels + + system_panels + + tasks_panels + + task_errors_panels + + source_panels + + sink_panels + + worker_panels + ) + + # build dashboard + return G.Dashboard( + title="Kafka Connect cluster", + description="Overview of the Kafka Connect cluster", + tags=["confluent", "kafka-connect"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +connect_cluster_label = os.environ.get( + "CONNECT_CLUSTER_LABEL", "kafka_connect_cluster_id" +) + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label, connect_cluster_label) diff --git a/grafana-dashboards/kafka-consumer.py b/grafana-dashboards/kafka-consumer.py new file mode 100644 index 00000000..eb3cf89a --- /dev/null +++ b/grafana-dashboards/kafka-consumer.py @@ -0,0 +1,1083 @@ +import os +import grafanalib.core as G + + +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka Consumer dashboard + It includes: + - Clients overview + - Performance + - Consumer Group + - Connections + - Per Broker + - Per Topic + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes + default_height = 5 + stat_width = 4 + ts_width = 8 + topk = "10" + + # Queries + by_env = env_label + '="$env"' + by_consumer = by_env + ', client_type="consumer"' + by_server = by_consumer + "," + server_label + '=~"$server"' + by_client = by_server + ', client_id=~"$client_id"' + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="server", + label="Server", + dataSource=ds, + query="label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{" + + by_consumer + + "}," + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="client_id", + label="Client ID", + dataSource=ds, + query="label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{" + + by_consumer + + "},client_id)", + multi=True, + includeAll=True, + ), + ] + ) + + # Panel groups + ## Clients overview: + overview_base = 0 + overview_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="Record Consumed Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" + + by_client + + "} > 0)", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="Records Lag", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" + + by_client + + "} > 0)", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="Rebalance Rate per hour", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{" + + by_client + + "} > 0)", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=10.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="Failed Rebalance Rate per hour", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{" + + by_client + + "} > 0)", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + ), + G.Stat( + title="Versions", + dataSource=ds, + targets=[ + G.Target( + expr="count(kafka_consumer_app_info{" + + by_client + + ', version!=""}) by (version)', + legendFormat="{{version}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=0), + ), + ] + + ## Performance: + performance_base = overview_base + 1 + performance_inner = [ + G.TimeSeries( + title="Bytes Consumed Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + ), + ), + G.TimeSeries( + title="Records Consumed Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cts", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + ), + ), + G.TimeSeries( + title="Records Lag Max", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + ), + ), + G.TimeSeries( + title="Fetch Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Fetch Latency", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Fetch Size", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Fetch Throttle Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 2 + ), + ), + ] + performance_panels = [ + G.RowPanel( + title="Performance", + gridPos=G.GridPos(h=1, w=24, x=0, y=performance_base), + collapsed=True, + panels=performance_inner, + ), + ] + + ## Consumer Group: + group_base = performance_base + 3 + group_inner = [ + G.TimeSeries( + title="Commit Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_commit_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Join Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_join_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Sync Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_sync_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Commit Latency", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_commit_latency_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Join Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_join_time_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_join_time_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Sync Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_sync_time_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_sync_time_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Heartbeat Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Heartbeat Response Time (Max.)", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Last Heartbeat Seconds Ago", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="s", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Rebalance Rate Per Hour", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (failed)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Rebalance Latency", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Assigned Partitions", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_assigned_partitions{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 3 + ), + ), + ] + group_panels = [ + G.RowPanel( + title="Consumer group", + gridPos=G.GridPos(h=1, w=24, x=0, y=group_base), + collapsed=True, + panels=group_inner, + ), + ] + + ## Connections: + connection_base = group_base + 4 + connection_inner = [ + G.TimeSeries( + title="Connection Count", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_connection_count{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + ), + ), + G.TimeSeries( + title="Connection Creation Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_connection_creation_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + ), + ), + G.TimeSeries( + title="Connection Close Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_connection_close_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + ), + ), + G.TimeSeries( + title="IO ratio", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_io_ratio{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="IO wait ratio", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_io_wait_ratio{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="Select Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_select_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="IO time avg.", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_io_time_ns_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ns", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 2 + ), + ), + G.TimeSeries( + title="IO wait time avg.", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_io_wait_time_ns_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ns", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 2 + ), + ), + ] + connection_panels = [ + G.RowPanel( + title="Connections", + gridPos=G.GridPos(h=1, w=24, x=0, y=connection_base), + collapsed=True, + panels=connection_inner, + ), + ] + + ## Per Broker: + per_broker_base = connection_base + 3 + per_broker_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_incoming_byte_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + ), + ), + G.TimeSeries( + title="Outgoing Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_outgoing_byte_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + ), + ), + G.TimeSeries( + title="Request Latency", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_request_latency_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_request_latency_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=per_broker_base + ), + ), + G.TimeSeries( + title="Request Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_request_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + 1 + ), + ), + G.TimeSeries( + title="Response Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_response_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 1 + ), + ), + ] + per_broker_panels = [ + G.RowPanel( + title="Per Broker", + gridPos=G.GridPos(h=1, w=24, x=0, y=per_broker_base), + collapsed=True, + panels=per_broker_inner, + ), + ] + + ## Per Topic: + per_topic_base = per_broker_base + 2 + per_topic_inner = [ + G.TimeSeries( + title="Bytes Consumed Rate per Topic", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + 0 + ), + ), + G.TimeSeries( + title="Records Consumed Rate per Topic", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 0 + ), + ), + G.TimeSeries( + title="Fetch Size per Topic", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{topic}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{topic}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + 1 + ), + ), + G.TimeSeries( + title="Records per Request Avg. per Topic", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 0 + ), + ), + ] + per_topic_panels = [ + G.RowPanel( + title="Per Topic", + gridPos=G.GridPos(h=1, w=24, x=0, y=per_topic_base), + collapsed=True, + panels=per_topic_inner, + ), + ] + + # group all panels + panels = ( + overview_panels + + performance_panels + + group_panels + + connection_panels + + per_broker_panels + + per_topic_panels + ) + + # build dashboard + return G.Dashboard( + title="Kafka Consumer", + description="Overview of the Kafka consumers", + tags=["confluent", "kafka-client", "kafka-consumer"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/kafka-producer.py b/grafana-dashboards/kafka-producer.py new file mode 100644 index 00000000..f8864041 --- /dev/null +++ b/grafana-dashboards/kafka-producer.py @@ -0,0 +1,957 @@ +import os +import grafanalib.core as G + + +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka Producer dashboard + It includes: + - Clients overview + - Performance + - Connections + - Per Broker + - Per Topic + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + # Default sizes + default_height = 5 + stat_width = 4 + ts_width = 8 + topk = "10" + + # Queries + by_env = env_label + '="$env"' + by_producer = by_env + ', client_type="producer"' + by_server = by_producer + "," + server_label + '=~"$server"' + by_client = by_server + ', client_id=~"$client_id"' + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="server", + label="Server", + dataSource=ds, + query="label_values(kafka_producer_producer_metrics_record_retry_rate{" + + by_producer + + "}," + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="client_id", + label="Client ID", + dataSource=ds, + query="label_values(kafka_producer_producer_metrics_record_retry_rate{" + + by_producer + + "},client_id)", + multi=True, + includeAll=True, + ), + ] + ) + + # Panel groups + ## Clients overview: + overview_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="Record Send Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_producer_producer_metrics_record_send_rate{" + + by_client + + "} > 0)", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="Error Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_producer_producer_metrics_record_error_rate{" + + by_client + + "} > 0)", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="Retry Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_producer_producer_metrics_record_retry_rate{" + + by_client + + "} > 0)", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=10.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="Versions", + dataSource=ds, + targets=[ + G.Target( + expr="count(kafka_producer_app_info{" + + by_client + + ',version!=""}) by (version)', + legendFormat="{{version}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + ), + ] + + ## Performance: + performance_base = 1 + performance_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_incoming_byte_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Outgoing Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_outgoing_byte_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Metadata Age", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_metadata_age{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="s", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Request Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_request_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Request in-flight", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_requests_in_flight{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Records per Request (avg.)", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_records_per_request_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Record Send Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_send_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Record Retry Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_retry_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Record Error Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_error_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Record Size", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_size_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_size_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Record Queue Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_queue_time_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_queue_time_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Produce Throttle Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_produce_throttle_time_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_produce_throttle_time_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Batch Size", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_batch_size_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_batch_size_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 4 + ), + ), + G.TimeSeries( + title="Batch Split Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_batch_split_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 4 + ), + ), + G.TimeSeries( + title="Compression Rate (avg.)", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_compression_rate_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 4 + ), + ), + ] + performance_panels = [ + G.RowPanel( + title="Performance", + gridPos=G.GridPos(h=1, w=24, x=0, y=performance_base), + collapsed=True, + panels=performance_inner, + ), + ] + + ## Connections: + connection_base = performance_base + 5 + connection_inner = [ + G.TimeSeries( + title="Connection Count", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_connection_count{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + ), + ), + G.TimeSeries( + title="Connection Creation Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_connection_creation_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + ), + ), + G.TimeSeries( + title="Connection Close Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_connection_close_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + ), + ), + G.TimeSeries( + title="IO ratio", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_io_ratio{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + # unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="IO wait ratio", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_io_wait_ratio{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + # unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="Select Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_select_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="IO time avg.", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_io_time_ns_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ns", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 2 + ), + ), + G.TimeSeries( + title="IO wait time avg.", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_io_wait_time_ns_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ns", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 2 + ), + ), + ] + connection_panels = [ + G.RowPanel( + title="Connections", + gridPos=G.GridPos(h=1, w=24, x=0, y=connection_base), + collapsed=True, + panels=connection_inner, + ), + ] + + ## Per Broker: + per_broker_base = connection_base + 2 + per_broker_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_incoming_byte_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + ), + ), + G.TimeSeries( + title="Outgoing Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_outgoing_byte_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + ), + ), + G.TimeSeries( + title="Request Latency", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_request_latency_avg{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_request_latency_max{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=per_broker_base + ), + ), + G.TimeSeries( + title="Request Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_request_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + 1 + ), + ), + G.TimeSeries( + title="Response Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_response_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 1 + ), + ), + ] + per_broker_panels = [ + G.RowPanel( + title="Per Broker", + gridPos=G.GridPos(h=1, w=24, x=0, y=per_broker_base), + collapsed=True, + panels=per_broker_inner, + ), + ] + + ## Per Topic: + per_topic_base = per_broker_base + 2 + per_topic_inner = [ + G.TimeSeries( + title="Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_byte_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_topic_base + ), + ), + G.TimeSeries( + title="Compression Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_compression_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_topic_base + ), + ), + G.TimeSeries( + title="Record Send Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_record_send_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=per_topic_base + ), + ), + G.TimeSeries( + title="Record Retry Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_record_retry_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_topic_base + 1 + ), + ), + G.TimeSeries( + title="Record Error Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_record_error_rate{" + + by_client + + "})", + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_topic_base + 1 + ), + ), + ] + per_topic_panels = [ + G.RowPanel( + title="Per Topic", + gridPos=G.GridPos(h=1, w=24, x=0, y=per_topic_base), + collapsed=True, + panels=per_topic_inner, + ), + ] + + # group all panels + panels = ( + overview_panels + + performance_panels + + connection_panels + + per_broker_panels + + per_topic_panels + ) + + # build dashboard + return G.Dashboard( + title="Kafka Producer", + description="Overview of the Kafka producers", + tags=["confluent", "kafka-client", "kafka-producer"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/kafka-quotas.py b/grafana-dashboards/kafka-quotas.py new file mode 100644 index 00000000..72b870e6 --- /dev/null +++ b/grafana-dashboards/kafka-quotas.py @@ -0,0 +1,229 @@ +import os +import grafanalib.core as G + + +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka Quotas dashboard + It includes: + - Quotas overview + - Throttling + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes + default_height = 6 + ts_width = 8 + topk = "10" + + # Queries + by_env = env_label + '="$env"' + by_client = ( + by_env + + ',user=~"$user",client_id=~"$client_id",' + + server_label + + '=~"$broker"' + ) + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="broker", + label="Broker", + dataSource=ds, + query="label_values(kafka_server_produce_byte_rate{" + + by_env + + "}," + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="user", + label="User", + dataSource=ds, + query="label_values(user)", + multi=True, + includeAll=True, + ), + G.Template( + name="client_id", + label="Client ID", + dataSource=ds, + query="label_values(client_id)", + multi=True, + includeAll=True, + ), + ] + ) + + # Panels: + panels = [ + G.TimeSeries( + title="Produce Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_produce_byte_rate{" + + by_client + + "})", + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=0), + ), + G.TimeSeries( + title="Fetch Byte Rate", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_fetch_byte_rate{" + + by_client + + "})", + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=0), + ), + G.TimeSeries( + title="Request Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_request_request_time{" + + by_client + + "})", + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percent", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=0), + ), + G.TimeSeries( + title="Produce Throttle Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_produce_throttle_time{" + + by_client + + "} > 0)", + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=1), + ), + G.TimeSeries( + title="Fetch Throttle Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_fetch_throttle_time{" + + by_client + + "} > 0)", + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=1), + ), + G.TimeSeries( + title="Request Throttle Time", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_request_throttle_time{" + + by_client + + "} > 0)", + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=1), + ), + ] + + return G.Dashboard( + title="Kafka Quotas", + description="Overview of the Kafka quotass", + tags=["confluent", "kafka-client", "kafka-quota"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/kafka-topics.py b/grafana-dashboards/kafka-topics.py new file mode 100644 index 00000000..0a64cf5d --- /dev/null +++ b/grafana-dashboards/kafka-topics.py @@ -0,0 +1,304 @@ +import os +import grafanalib.core as G + + +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka Topics dashboard + It includes: + - Throughput + - Offsets + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes + default_height = 10 + ts_width = 12 + table_width = 12 + topk = "10" + + # Queries + by_env = env_label + '="$env"' + by_topic = by_env + ',topic=~"$topic"' + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="topic", + label="Topic", + dataSource=ds, + query="label_values(kafka_log_log_size{" + by_env + "}, topic)", + multi=True, + includeAll=True, + ), + ] + ) + + # Panel groups: + ## Throughtput: + throughput_base = 0 + throughput_layers = 3 + throughput_panels = [ + G.RowPanel( + title="Throughput", + gridPos=G.GridPos(h=1, w=24, x=0, y=throughput_base), + ), + G.TimeSeries( + title="Messages In/Sec", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum without(instance,pod,statefulset_kubernetes_io_pod_name) " + + "(rate(kafka_server_brokertopicmetrics_messagesinpersec{" + + by_topic + + "}[5m])))", + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 0, y=throughput_base + ), + ), + G.TimeSeries( + title="Log size", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum(kafka_log_log_size{" + + by_topic + + "}) by (topic))", + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 1, y=throughput_base + ), + ), + G.TimeSeries( + title="Bytes In/Sec", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{" + + by_topic + + "}[5m])))", + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 0, y=throughput_base + 1 + ), + ), + G.TimeSeries( + title="Bytes Out/Sec", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum without(instance,pod,statefulset_kubernetes_io_pod_name) " + + "(rate(kafka_server_brokertopicmetrics_bytesoutpersec{" + + by_topic + + "}[5m])))", + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 1, y=throughput_base + 1 + ), + ), + G.TimeSeries( + title="Produce Requests/Sec", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ " + + by_topic + + "}[5m])) by (topic))", + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 0, y=throughput_base + 2 + ), + ), + G.TimeSeries( + title="Consumer Fetch Requests/Sec", + dataSource=ds, + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ " + + by_topic + + "}[5m])) by (topic))", + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 1, y=throughput_base + 2 + ), + ), + ] + + offsets_txs = [ + { + "id": "concatenate", + "options": {"frameNameLabel": "id", "frameNameMode": "label"}, + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + server_label + " 1", + "topic 1", + "Value #A", + "Value #B", + "partition 1", + ] + } + }, + }, + {"id": "sortBy", "options": {"fields": {}, "sort": [{"field": "partition 1"}]}}, + {"id": "sortBy", "options": {"fields": {}, "sort": [{"field": "topic 1"}]}}, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value #A": 3, + "Value #B": 4, + server_label + " 1": 5, + "partition 1": 2, + "topic 1": 1, + }, + "renameByName": { + "Value #A": "start offset", + "Value #B": "end offset", + server_label + " 1": "broker", + "partition 1": "", + "topic": "", + "topic 1": "", + }, + }, + }, + ] + + ## Offsets + offsets_base = throughput_base + throughput_layers + offsets_inner = [ + G.Table( + title="Offsets", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_log_log_logstartoffset{" + by_topic + "}", + legendFormat="{{topic}}", + format="table", + instant=True, + ), + G.Target( + expr="kafka_log_log_logendoffset{" + by_topic + "}", + legendFormat="{{topic}}", + format="table", + instant=True, + ), + ], + filterable=True, + transformations=offsets_txs, + gridPos=G.GridPos( + h=default_height, w=table_width * 2, x=table_width * 0, y=offsets_base + ), + ), + ] + offsets_panels = [ + G.RowPanel( + title="Offsets", + gridPos=G.GridPos(h=1, w=24, x=0, y=offsets_base), + collapsed=True, + panels=offsets_inner, + ), + ] + + # group all panels + panels = throughput_panels + offsets_panels + + # build dashboard + return G.Dashboard( + title="Kafka topics", + description="Overview of the Kafka topics", + tags=["confluent", "kafka"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/ksqldb-cluster.py b/grafana-dashboards/ksqldb-cluster.py new file mode 100644 index 00000000..22755a90 --- /dev/null +++ b/grafana-dashboards/ksqldb-cluster.py @@ -0,0 +1,754 @@ +import os +import grafanalib.core as G + + +def dashboard( + ds="Prometheus", + env_label="namespace", + server_label="pod", + ksqldb_cluster_label="app", +): + """ + ksqlDB cluster dashboard + It includes: + - Cluster overview + - System resources + - Query Performance + - State Stores + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes + default_height = 5 + stat_width = 4 + ts_width = 8 + + # Queries + by_env = env_label + '="$env"' + by_cluster = by_env + "," + ksqldb_cluster_label + '="$ksqldb_cluster"' + by_server = by_cluster + "," + server_label + '=~"$ksqldb_server"' + by_thread = by_server + 'thread_id=~".+$ksqldb_cluster_id.+"' + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="ksqldb_cluster", + label="ksqlDB cluster", + dataSource=ds, + query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" + + by_env + + "}," + + ksqldb_cluster_label + + ")", + ), + G.Template( + name="ksqldb_cluster_id", + label="ksqlDB cluster ID", + dataSource=ds, + query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" + + by_env + + "},ksql_cluster)", + hide=2, # true + ), + G.Template( + name="ksqldb_server", + label="ksqlDB server", + dataSource=ds, + query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" + + by_cluster + + "}, " + + server_label + + ")", + multi=True, + includeAll=True, + ), + ] + ) + + # Panel groups + ## Cluster overview: + ### When updating descriptions on these panels, also update descriptions in confluent-platform.py + overview_base = 0 + overview_panels = [ + G.RowPanel( + title="Cluster Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=overview_base), + ), + # First layer + G.Stat( + title="ksqlDB: Online Servers", + description="""ksqlDB online instances returning metrics. + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(ksql_ksql_engine_query_stats_num_active_queries{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 0, y=overview_base + ), + ), + G.Stat( + title="ksqlDB: Sum of Active Queries", + description="""Number of active queries deployed in the cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_num_active_queries{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 1, y=overview_base + ), + ), + G.Stat( + title="ksqlDB: Sum of Running Queries", + description="""Number of running queries deployed in the cluster. + Ideally, this number should be equal to the number of active queries as queries should be running. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_running_queries{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 2, y=overview_base + ), + ), + G.Stat( + title="ksqlDB: Sum of Rebalancing Queries", + description="""Number of queries rebalancing in the cluster. + Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute). + It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time. + """, + dataSource=ds, + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_rebalancing_queries{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 3, y=overview_base + ), + ), + G.Stat( + title="Connect: Sum of Queries Failed", + description="""Number of queries failed in the cluster. + Ideally, this number should be equal zero. + It's recommended to alert if the number of queries failed is higher than 0. + """, + dataSource=ds, + targets=[ + G.Target( + expr="avg(ksql_ksql_engine_query_stats_error_queries{" + + by_cluster + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 4, y=overview_base + ), + ), + # Second layer + G.TimeSeries( + title="Cluster Liveness", + description="A metric with constant value 1 indicating the server is up and emitting metrics.", + dataSource=ds, + targets=[ + G.Target( + expr="ksql_ksql_engine_query_stats_liveness_indicator{" + + by_cluster + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=0, y=overview_base + 1 + ), + ), + G.TimeSeries( + title="Messages consumed/sec", + description="The number of messages consumed per second across all queries.", + dataSource=ds, + targets=[ + G.Target( + expr="ksql_ksql_engine_query_stats_messages_consumed_per_sec{" + + by_cluster + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=overview_base + 1 + ), + ), + G.TimeSeries( + title="Messages produced/sec", + description="The number of messages produced per second across all queries.", + dataSource=ds, + targets=[ + G.Target( + expr="ksql_ksql_engine_query_stats_messages_produced_per_sec{" + + by_cluster + + "}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=overview_base + 1 + ), + ), + ] + + ## System resources: + ### When updating descriptions on these panels, also update descriptions in other cluster dashboards + system_base = overview_base + 2 + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="CPU usage", + description="""Rate of CPU seconds used by the Java process. + 100% usage represents one core. + If there are multiple cores, the total capacity should be 100% * number_cores.""", + dataSource=ds, + targets=[ + G.Target( + expr="irate(process_cpu_seconds_total{" + by_server + "}[5m])", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=system_base + ), + ), + G.TimeSeries( + title="Memory usage", + description="""Sum of JVM memory used, without including areas (e.g. heap size).""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(area)(jvm_memory_bytes_used{" + by_server + "})", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=system_base + ), + ), + G.TimeSeries( + title="GC collection", + description="""Sum of seconds used by Garbage Collection.""", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + by_server + + "}[5m]))", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=system_base + ), + ), + ] + + ## Query performance + queries_base = system_base + 1 + queries_inner = [ + G.TimeSeries( + title="Poll Latency", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_thread_metrics_poll_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}} (avg.)", + ), + G.Target( + expr="kafka_streams_stream_thread_metrics_poll_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + ), + ), + G.TimeSeries( + title="Process Latency", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_thread_metrics_process_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}} (avg.)", + ), + G.Target( + expr="kafka_streams_stream_thread_metrics_process_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + ), + ), + G.TimeSeries( + title="Commit Latency", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_thread_metrics_commit_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}} (avg.)", + ), + G.Target( + expr="kafka_streams_stream_thread_metrics_commit_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + 1 + ), + ), + G.TimeSeries( + title="Punctuate Latency", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_thread_metrics_punctuate_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}} (avg.)", + ), + G.Target( + expr="kafka_streams_stream_thread_metrics_punctuate_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + 1 + ), + ), + ] + queries_panels = [ + G.RowPanel( + title="Queries Performance", + gridPos=G.GridPos(h=1, w=24, x=0, y=queries_base), + collapsed=True, + panels=queries_inner, + ), + ] + + ## State stores: + stores_base = queries_base + 2 + stores_inner = [ + G.TimeSeries( + title="Put Rate", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_put_rate{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 0 + ), + ), + G.TimeSeries( + title="Put Latency (Avg.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_put_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 0 + ), + ), + G.TimeSeries( + title="Put Latency (Max.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_put_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 0 + ), + ), + G.TimeSeries( + title="Put if absent Rate", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_put_if_absent_rate{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 1 + ), + ), + G.TimeSeries( + title="Put if absent Latency (Avg.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_put_if_absent_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 1 + ), + ), + G.TimeSeries( + title="Put if absent Latency (Max.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_put_if_absent_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 1 + ), + ), + G.TimeSeries( + title="Fetch Rate", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_fetch_rate{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 2 + ), + ), + G.TimeSeries( + title="Fetch Latency (Avg.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_fetch_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 2 + ), + ), + G.TimeSeries( + title="Fetch Latency (Max.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_fetch_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 2 + ), + ), + G.TimeSeries( + title="Delete Rate", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_delete_rate{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 3 + ), + ), + G.TimeSeries( + title="Delete Latency (Avg.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_delete_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 3 + ), + ), + G.TimeSeries( + title="Delete Latency (Max.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_delete_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 3 + ), + ), + G.TimeSeries( + title="Restore Rate", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_restore_rate{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 4 + ), + ), + G.TimeSeries( + title="Restore Latency (Avg.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_restore_latency_avg{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 4 + ), + ), + G.TimeSeries( + title="Restore Latency (Max.)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_streams_stream_state_metrics_restore_latency_max{" + + by_thread + + "}", + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 4 + ), + ), + ] + stores_panels = [ + G.RowPanel( + title="State Stores", + gridPos=G.GridPos(h=1, w=24, x=0, y=stores_base), + collapsed=True, + panels=stores_inner, + ), + ] + + # group all panels + panels = overview_panels + system_panels + queries_panels + stores_panels + + # build dashboard + return G.Dashboard( + title="ksqlDB cluster", + description="Overview of ksqlDB clusters.", + tags=[ + "confluent", + "ksqldb", + ], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +ksqldb_cluster_label = os.environ.get("KSQLDB_CLUSTER_LABEL", "ksqldb_cluster_id") + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label, ksqldb_cluster_label) diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py new file mode 100644 index 00000000..1b61e38b --- /dev/null +++ b/grafana-dashboards/schema-registry-cluster.py @@ -0,0 +1,305 @@ +import os +import grafanalib.core as G + + +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Schema Registry cluster dashboard + It includes: + - Cluster overview + - System resources + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes + default_height = 5 + stat_width = 4 + ts_width = 8 + + # Queries + by_env = env_label + '="$env"' + by_server = by_env + "," + server_label + '="$sr_server"' + + # Templating (variables) + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource=ds, + query="label_values(" + env_label + ")", + ), + G.Template( + name="sr_server", + label="Server", + dataSource=ds, + query="label_values(kafka_schema_registry_registered_count{" + + by_env + + "}, " + + server_label + + ")", + multi=True, + includeAll=True, + ), + ] + ) + + # Panel groups + ## Cluster overview: + ### When updating descriptions on these panels, also update descriptions in confluent-platform.py + healthcheck_base = 0 + healthcheck_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="SR: Online instances", + description="""Schema Registry online instances returning metrics. + """, + dataSource=ds, + targets=[ + G.Target( + expr="count(kafka_schema_registry_registered_count{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=2.0, color="green"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 0, y=healthcheck_base + ), + ), + G.Stat( + title="SR: Registered Schemas (avg.)", + description="""Average number of registered schemas across the cluster. + """, + dataSource=ds, + targets=[ + G.Target( + expr="avg(kafka_schema_registry_registered_count{" + by_env + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 1, y=healthcheck_base + ), + ), + G.Stat( + title="SR: Created Schemas by Type (avg.)", + description="""Average number of schemas created, by type. + """, + dataSource=ds, + targets=[ + G.Target( + expr="avg(kafka_schema_registry_schemas_created{" + + by_env + + "}) by (schema_type)", + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 2, y=healthcheck_base + ), + ), + G.Stat( + title="SR: Sum of Deleted Schemas by Type", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_schema_registry_schemas_deleted{" + + by_env + + "}) by (schema_type)", + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 3, y=healthcheck_base + ), + ), + G.Stat( + title="SR: Sum of Active Connections", + description="Number of active connections", + dataSource=ds, + targets=[ + G.Target( + expr="sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count{" + + by_env + + "})", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 4, y=healthcheck_base + ), + ), + ] + + ## System resources: + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.TimeSeries( + title="CPU usage", + dataSource=ds, + targets=[ + G.Target( + expr="irate(process_cpu_seconds_total{" + by_server + "}[5m])", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=1), + ), + G.TimeSeries( + title="Memory usage", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(area)(jvm_memory_bytes_used{" + by_server + "})", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=1), + ), + G.TimeSeries( + title="GC collection", + dataSource=ds, + targets=[ + G.Target( + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + by_server + + "}[5m]))", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=1), + ), + ] + + + request_panels = [ + G.RowPanel( + title="Requests", + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + ), + G.TimeSeries( + title="Connections", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_schema_registry_kafka_schema_registry_metrics_connection_count{" + + by_env + +"}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=2), + ), + G.TimeSeries( + title="Request Rate", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_schema_registry_jersey_metrics_request_rate{" + + by_env + +"}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=2), + ), + G.TimeSeries( + title="Request Latency (p99)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_schema_registry_jersey_metrics_request_latency_99{" + + by_env + +"}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=2), + ), + ] + + # group all panels + panels = healthcheck_panels + system_panels + request_panels + + # build dashboard + return G.Dashboard( + title="Schema Registry cluster", + description="Overview of the Schema Registry cluster", + tags=["confluent", "schema-registry"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py new file mode 100644 index 00000000..cea3d67b --- /dev/null +++ b/grafana-dashboards/zookeeper-cluster.py @@ -0,0 +1,404 @@ +import os +import grafanalib.core as G + + +def dashboard(env_label="namespace", server_label="pod"): + default_height = 5 + stat_width = 4 + ts_width = 8 + + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", + ), + G.Template( + name="zk_server", + label="Server", + dataSource="Prometheus", + query="label_values(zookeeper_outstandingrequests{" + + env_label + + '="$env"}, ' + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="quantile", + label="Quantile", + dataSource="Prometheus", + query="label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{" + + env_label + + '="$env"}, quantile)', + ), + ] + ) + + healthcheck_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="ZK: Quorum Size", + description="""Quorum Size of Zookeeper ensemble. + Count Zookeeper servers with quorum size metric. + """, + dataSource="Prometheus", + targets=[ + G.Target( + expr="count(zookeeper_status_quorumsize{" + env_label + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=2.0, color="yellow"), + G.Threshold(index=2, value=3.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="ZK: ZNodes (avg.)", + description="""Average size of ZNodes in the cluster. + Getting the node count per server, and averaging the node count. + """, + dataSource="Prometheus", + targets=[ + G.Target( + expr="avg(zookeeper_inmemorydatatree_nodecount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="ZK: Connections used", + description="""Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host. + If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened. + """, + dataSource="Prometheus", + targets=[ + G.Target( + expr="zookeeper_numaliveconnections{" + + env_label + + '="$env"} / zookeeper_maxclientcnxnsperhost{' + + env_label + + '="$env"}', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=0.6, color="yellow"), + G.Threshold(index=2, value=0.8, color="red"), + ], + format="percentunit", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="ZK: Sum of watchers", + description="""Sum of client watchers subscribed to changes on the ZNodes. + """, + dataSource="Prometheus", + targets=[ + G.Target( + expr="sum(zookeeper_inmemorydatatree_watchcount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + ), + G.TimeSeries( + title="ZK: Outstanding Requests", + description="""Number of requests waiting for processing (queued). + If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked. + It could mean that there is not enough resources to cope with the number of requests. + """, + dataSource="Prometheus", + targets=[ + G.Target( + expr="zookeeper_outstandingrequests{" + env_label + '="$env"}', + legendFormat="{{" + + server_label + + "}} ({{server_id}}:{{member_type}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "last"], + legendPlacement="right", + gridPos=G.GridPos(h=default_height, w=ts_width, x=stat_width * 4, y=0), + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=10.0, color="red"), + ], + ), + ] + + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.TimeSeries( + title="CPU usage", + dataSource="Prometheus", + targets=[ + G.Target( + expr="irate(process_cpu_seconds_total{" + + env_label + + '="$env",' + + server_label + + '=~"$zk_server"}[5m])', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=1), + ), + G.TimeSeries( + title="Memory usage", + dataSource="Prometheus", + targets=[ + G.Target( + expr="sum without(area)(jvm_memory_bytes_used{" + + env_label + + '="$env",' + + server_label + + '="$zk_server"})', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=1), + ), + G.TimeSeries( + title="GC collection", + dataSource="Prometheus", + targets=[ + G.Target( + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + env_label + + '="$env",' + + server_label + + '="$zk_server"}[5m]))', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=1), + ), + ] + + # TODO: validate if latency metrics make sense. + # Values are high-watermark of the metric and multiplied by tick-time to represent milliseconds. + latency_inner = [ + G.TimeSeries( + title="ZK: Request Latency (Minimum)", + dataSource="Prometheus", + targets=[ + G.Target( + expr="zookeeper_minrequestlatency{" + + env_label + + '="$env"} * zookeeper_ticktime', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=2), + ), + G.TimeSeries( + title="ZK: Request Latency (Average)", + dataSource="Prometheus", + targets=[ + G.Target( + expr="zookeeper_avgrequestlatency{" + + env_label + + '="$env"} * zookeeper_ticktime', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=2), + ), + G.TimeSeries( + title="ZK: Request Latency (Maximum)", + dataSource="Prometheus", + targets=[ + G.Target( + expr="zookeeper_maxrequestlatency{" + + env_label + + '="$env"} * zookeeper_ticktime', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=2), + ), + ] + latency_panels = [ + G.RowPanel( + title="Server Latency", + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + collapsed=True, + panels=latency_inner, + ), + ] + + kafka_base = 2 + 1 + kafka_inner = [ + G.TimeSeries( + title="Kafka: Request Latency", + dataSource="Prometheus", + targets=[ + G.Target( + expr="kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{" + + env_label + + '="$env",quantile=~"$quantile"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=kafka_base + ), + ), + G.TimeSeries( + title="Kafka: Sync Connections/sec", + dataSource="Prometheus", + targets=[ + G.Target( + expr="kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{" + + env_label + + '="$env"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=kafka_base + ), + ), + G.TimeSeries( + title="Kafka: Expired Connections/sec", + dataSource="Prometheus", + targets=[ + G.Target( + expr="kafka_server_sessionexpirelistener_zookeeperexpirespersec{" + + env_label + + '="$env"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=kafka_base + ), + ), + G.TimeSeries( + title="Kafka: Disconnected Connections/sec", + dataSource="Prometheus", + targets=[ + G.Target( + expr="kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{" + + env_label + + '="$env"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=kafka_base + 1 + ), + ), + G.TimeSeries( + title="Kafka: Auth Failures on Connections/sec", + dataSource="Prometheus", + targets=[ + G.Target( + expr="kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{" + + env_label + + '="$env"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=kafka_base + 1 + ), + ), + ] + kafka_panels = [ + G.RowPanel( + title="Client Latency (Kafka)", + gridPos=G.GridPos(h=1, w=24, x=0, y=kafka_base), + collapsed=True, + panels=kafka_inner, + ), + ] + + panels = healthcheck_panels + system_panels + latency_panels + kafka_panels + + return G.Dashboard( + title="Zookeeper cluster", + description="Overview of the Zookeeper cluster", + tags=["confluent", "kafka", "zookeeper"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +dashboard = dashboard(env_label, server_label) diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json index d0cb048c..866e727f 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json @@ -1,78 +1,103 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the main health-check metrics from Confluent Platform components.", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 3, - "iteration": 1634040033398, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Zookeeper", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper cluster", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Quorum Size of Zookeeper ensemble", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 3 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" } ] }, @@ -81,70 +106,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(zookeeper_status_quorumsize{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper nodes online", + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -153,158 +189,180 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(zookeeper_inmemorydatatree_nodecount{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of ZNodes", + "title": "ZK: ZNodes (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Alive Connections", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 200 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 4, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_numaliveconnections{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "zookeeper_numaliveconnections{env=\"$env\"} / zookeeper_maxclientcnxnsperhost{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Alive Connections", + "title": "ZK: Connections used", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Watchers", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 1000 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -313,170 +371,246 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 1 + "y": 0 }, - "id": 22, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_inmemorydatatree_watchcount{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of Watchers", + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Alive Connections", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 200 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 8, "x": 16, - "y": 1 + "y": 0 }, - "id": 24, + "height": null, + "hideTimeOverride": false, + "id": 6, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "right" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "zookeeper_outstandingrequests{job=\"zookeeper\",env=\"$env\"}", - "instant": true, + "datasource": null, + "expr": "zookeeper_outstandingrequests{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} ({{member_type}})", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outstanding Requests", - "type": "stat" + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Kafka Cluster", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka cluster", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of active controllers in the cluster.", + "description": "Count of brokers available (online).\n This value is referential and should not be used for alerting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "#e5ac0e", - "value": 2 - }, - { - "color": "#bf1b00" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -485,80 +619,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 6 + "y": 1 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 8, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "vertical", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "value_and_name" + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{job=\"kafka-broker\",env=\"$env\"} > 0", + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Active Controllers", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Brokers Online", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 2 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -567,82 +702,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 6 + "y": 1 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.1.3", "repeat": null, - "repeatDirection": "h", + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(kafka_server_replicamanager_leadercount{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{env=\"$env\"} > 0", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Brokers Online", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Partitions that are online", + "description": "Sum of Topic partitions across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 0 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -651,80 +785,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 6 + "y": 1 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_server_replicamanager_partitioncount{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Online Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 5 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -733,81 +876,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 6 + "y": 1 }, - "id": 10, + "height": null, + "hideTimeOverride": false, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{env=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Replicated Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of partitions under min insync replicas.", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n It's recommended alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 5 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -816,81 +967,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 16, - "y": 6 + "y": 1 }, + "height": null, + "hideTimeOverride": false, "id": 12, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_cluster_partition_underminisr{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{env=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Min ISR Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-MinISR Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n It's recommended alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "#ef843c", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -899,661 +1058,138 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 20, - "y": 6 + "y": 1 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Offline Partitions Count", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Offline Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 26, - "panels": [], - "title": "Shema Registry", - "type": "row" - }, - { - "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] + "steps": [] } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 11 - }, - "id": 30, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "count(kafka_schema_registry_registered_count{job=\"schema-registry\",env=\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schema Registry Instances", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 11 - }, - "id": 28, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_schema_registry_registered_count{job=\"schema-registry\",env=\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schemas registered", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 11 - }, - "id": 33, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_schema_registry_schemas_deleted{job=\"schema-registry\",env=\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schemas deleted", - "type": "stat" - }, - { - "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 15 - }, - "id": 37, - "panels": [], - "repeat": "cluster", - "title": "Kafka Connect ($kafka_connect_cluster_id) ", - "type": "row" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 16 + "y": 2 }, - "id": 39, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "count(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect worker instances", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 16 - }, - "id": 48, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Total", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 16 - }, - "id": 41, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Running", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 16 - }, - "id": 43, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Paused", - "transformations": [], - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 16 - }, - "id": 45, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Failed", - "transformations": [], - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time since last rebalance", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 16 - }, - "id": 47, + "height": null, + "hideTimeOverride": false, + "id": 14, "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "repeat": "instance", - "targets": [ - { - "exemplar": true, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",job=\"connect\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"} >= 0", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Time since last rebalance ", - "type": "stat" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 52, + "maxPerRow": null, + "minSpan": null, "panels": [], - "repeat": "clusterid", - "title": "ksqlDB Cluster ($ksqldb_cluster_id) ", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Schema Registry cluster", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average number of active queries per server.", + "description": "Schema Registry online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 1 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" } ] }, @@ -1562,77 +1198,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 21 + "y": 2 }, - "id": 50, + "height": null, + "hideTimeOverride": false, + "id": 15, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "ksqlDB instances", + "title": "SR: Online instances", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average number of active queries per server.", + "description": "Average number of registered schemas across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1641,77 +1281,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 21 + "y": 2 }, - "id": 53, + "height": null, + "hideTimeOverride": false, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_num_active_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "instant": true, + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Active Queries", + "title": "SR: Registered Schemas (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of created queries", + "description": "Average number of schemas created, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 800 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1720,76 +1364,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 21 + "y": 2 }, - "id": 55, + "height": null, + "hideTimeOverride": false, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_running_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Running Queries", + "title": "SR: Schemas Created by Type (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of rebalancing queries", + "description": "Average number of schemas deleted, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1798,300 +1447,1235 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 21 + "y": 2 }, - "id": 57, + "height": null, + "hideTimeOverride": false, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rebalancing Queries", + "title": "SR: Schemas Deleted by Type (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of error query", + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 2 - } - ] - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 21 + "h": 1, + "w": 24, + "x": 0, + "y": 3 }, - "id": 59, + "height": null, + "hideTimeOverride": false, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Kafka Connect online workers returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "interval": "", - "legendFormat": "", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" } ], + "repeat": "connect_cluster", + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Queries in Error State", - "type": "stat" + "title": "Kafka Connect cluster: $connect_cluster", + "transformations": [], + "transparent": false, + "type": "row" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Num of not running queries", + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 21 + "h": 1, + "w": 24, + "x": 0, + "y": 4 }, - "id": 61, + "height": null, + "hideTimeOverride": false, + "id": 26, "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "ksqlDB online instances returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ { - "exemplar": true, - "expr": "sum(ksql_ksql_engine_query_stats_not_running_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "interval": "", - "legendFormat": "", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of active queries deployed in the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Queries Failed", + "transformations": [], + "transparent": false, + "type": "stat" } ], + "repeat": "ksqldb_cluster", + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Stopped Queries", - "type": "stat" + "title": "ksqlDB cluster: $ksqldb_cluster", + "transformations": [], + "transparent": false, + "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, - "datasource": null, - "definition": "label_values(env)", - "description": null, - "error": null, + "datasource": "Prometheus", "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "isNone": true, "selected": false, - "text": "None", - "value": "" + "tags": [], + "text": null, + "value": null }, - "datasource": null, - "definition": "label_values(kafka_connect_cluster_id)", - "description": null, - "error": null, - "hide": 0, + "datasource": "Prometheus", + "hide": true, "includeAll": false, - "label": "Kafka Connect Cluster ID", + "label": "Kafka Connect cluster", "multi": false, - "name": "kafka_connect_cluster_id", + "name": "connect_cluster", "options": [], - "query": { - "query": "label_values(kafka_connect_cluster_id)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\"}, kafka_connect_cluster_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "isNone": true, "selected": false, - "text": "None", - "value": "" + "tags": [], + "text": null, + "value": null }, - "datasource": null, - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "description": null, - "error": null, - "hide": 0, + "datasource": "Prometheus", + "hide": true, "includeAll": false, - "label": "ksqlDB Cluster ID", + "label": "ksqlDB cluster", "multi": false, - "name": "ksqldb_cluster_id", + "name": "ksqldb_cluster", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{env=\"$env\"}, ksqldb_cluster_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", "title": "Confluent Platform overview", - "uid": "JiqnBMNnz", - "version": 1 -} \ No newline at end of file + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json index 3be6d2a8..330da237 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json @@ -1,75 +1,87 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Kafka resource usage and throughput", + "description": "Overview of the Kafka cluster", "editable": true, - "gnetId": 721, - "graphTooltip": 0, - "id": 6, - "iteration": 1647427255896, + "gnetId": null, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Healthcheck", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Overview", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of active controllers in the cluster.", + "description": "Count of brokers available (online).\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "#e5ac0e", - "value": 2 - }, - { - "color": "#bf1b00" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -78,79 +90,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "vertical", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "value_and_name" + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_controller_kafkacontroller_activecontrollercount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"} > 0", + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Active Controllers", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Brokers Online", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 2 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -159,81 +173,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.1.3", "repeat": null, - "repeatDirection": "h", + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(kafka_server_replicamanager_leadercount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{env=\"$env\"} > 0", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Brokers Online", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Partitions that are online", + "description": "\n Number of partitions where the preferred replica is not the leader.\n Usually, this number is 0.\n Restarting nodes could cause this values to change, but when reassigning happens the value stabilize.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 0 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -242,78 +256,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 4, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_replicamanager_partitioncount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Online Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Preferred Replica Imbalance", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Number of topics in the cluster.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 - }, - { - "color": "#d44a3a" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -322,266 +339,247 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, "y": 1 }, - "id": 33, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Preferred Replica Imbalance", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Topics", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Sum of requests per second rated over a 5 min. period.\n Gives an idea of the processing load in the cluster.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "Bps" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, + "h": 5, + "w": 4, "x": 16, - "y": 1 + "y": 0 }, - "id": 84, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Bytes in", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "Bytes out", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "B", - "step": 4 + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Broker network throughput", - "type": "timeseries" + "title": "Kafka: Rate of Requests/Sec", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", + "description": "Sum of log sizes per broker.\n This must be compared with the total storage space available in the brokers.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#bf1b00", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 20, + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 7, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_log_log_size{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Replicated Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Log Size", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of partitions under min insync replicas.", + "description": "Sum of Topic partitions across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#bf1b00", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -590,80 +588,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 4, - "y": 5 + "x": 0, + "y": 1 }, - "id": 32, + "height": null, + "hideTimeOverride": false, + "id": 8, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_cluster_partition_underminisr{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{env=\"$env\",hostname=~\"$broker\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Min ISR Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "#ef843c", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -672,78 +679,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 8, - "y": 5 + "x": 4, + "y": 1 }, - "id": 22, + "height": null, + "hideTimeOverride": false, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{env=\"$env\",hostname=~\"$broker\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Offline Partitions Count", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Unclean leader election rate", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n We recommend alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#d44a3a" + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -752,82 +770,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 12, - "y": 5 + "x": 8, + "y": 1 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{env=\"$env\",hostname=~\"$broker\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Unclean Leader Election Rate", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-MinISR Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 31, - "panels": [], - "title": "Request rate", - "type": "row" - }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Produce request rate.", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n We recommend alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -836,184 +861,272 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 10 + "x": 12, + "y": 1 }, - "id": 93, + "height": null, + "hideTimeOverride": false, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m]))", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "All Request Per Sec", + "title": "Kafka: Sum of Offline Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Produce request rate.", + "description": "Sum of bytes in per second rated over a 5 min. period.\n Gives an idea of the incoming throughput handle by the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 4, - "y": 10 + "x": 16, + "y": 1 }, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 12, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Produce\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce Request Per Sec", + "title": "Kafka: Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Fetch request rate.", + "description": "Sum of bytes out per second rated over a 5 min. period.\n Gives an idea of the outgoing throughput handle by the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 8, - "y": 10 + "x": 20, + "y": 1 }, - "id": 37, + "height": null, + "hideTimeOverride": false, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"FetchConsumer\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer Fetch Request Per Sec", + "title": "Kafka: Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System resources", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1035,14 +1148,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, "showPoints": "auto", "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -1050,33 +1161,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "steps": [] + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 10 + "h": 10, + "w": 8, + "x": 0, + "y": 2 }, - "id": 122, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1085,1544 +1195,268 @@ "mode": "single" } }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_network_requestmetrics_errorspersec{error!=\"NONE\"}[5m])", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{error}} @ {{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Errors", + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Fetch request rate.", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 14 + "h": 10, + "w": 8, + "x": 8, + "y": 2 }, - "id": 94, + "height": null, + "hideTimeOverride": false, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Fetch\"}[5m]))", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Broker Fetch Request Per Sec", - "type": "stat" + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Offset Commit request rate.", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 14 + "h": 10, + "w": 8, + "x": 16, + "y": 2 }, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"OffsetCommit\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Offset Commit Request Per Sec", - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Metadata request rate.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 14 - }, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Metadata\"}[5m]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Metadata Request Per Sec", - "type": "stat" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 40, - "panels": [], - "title": "System", - "type": "row" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Cores", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 19 - }, - "id": 27, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "irate(process_cpu_seconds_total{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])*100", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "process_cpu_secondspersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Memory", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 19 - }, - "id": 2, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "jvm_memory_bytes_used", - "refId": "A", - "step": 4 - }, - { - "expr": "jvm_memory_bytes_max{job=\"kafka-broker\",area=\"heap\",env=\"$env\",instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM Memory Used", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "% time in GC", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 19 - }, - "id": 3, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "jvm_gc_collection_seconds_sum", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Time spent in GC", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 29, - "panels": [ - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Messages/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 24 - }, - "id": 4, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_messagesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 24 - }, - "id": 5, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 24 - }, - "id": 6, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Messages/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 32 - }, - "id": 10, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "kafka_server_brokertopicmetrics_messagesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In Per Broker", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 32 - }, - "id": 7, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In Per Broker", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 32 - }, - "id": 9, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out Per Broker", - "type": "timeseries" - } - ], - "title": "Throughput In/Out", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 44, - "panels": [ - { - "datasource": "Prometheus", - "description": "Average fraction of time the network processor threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available)\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Processor Avg Usage Percent", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "Average fraction of time the request handler threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available).\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 25 - }, - "id": 25, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "1 - kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Handler Avg Percent", - "type": "timeseries" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Thread utilization", - "type": "row" + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 29 + "y": 3 }, - "id": 86, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Latency in millseconds for ZooKeeper requests from broker.\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 88, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Zookeeper Request Latency", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 26 - }, - "id": 92, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Zookeeper connections per sec", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", + "description": "Number of messages into topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2633,7 +1467,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2644,51 +1478,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 0, - "y": 35 + "y": 3 }, - "id": 89, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2697,24 +1525,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper expired connections per sec", + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Number of bytes into topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2725,7 +1567,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2736,51 +1578,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 8, - "y": 35 + "y": 3 }, - "id": 90, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2789,24 +1625,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper disconnect per sec", + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Number of bytes out of topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2817,7 +1667,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2828,51 +1678,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 16, - "y": 35 + "y": 3 }, - "id": 91, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2881,39 +1725,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper auth failures per sec", + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Zookeeper", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 4 }, - "id": 82, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", + "description": "Percent of time the network thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2924,7 +1809,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2935,51 +1820,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 27 + "y": 4 }, - "id": 80, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2988,23 +1867,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IsrShrinks per Sec", + "title": "Network processor usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", + "description": "Percent of time the IO thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3015,7 +1909,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3026,51 +1920,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 27 + "h": 10, + "w": 8, + "x": 8, + "y": 4 }, - "id": 83, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3079,38 +1967,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "rate(kafka_server_replicamanager_isrexpandspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IsrExpands per Sec", + "title": "Request processor (IO) usage", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Isr Shrinks / Expands", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread utilization", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 5 }, - "id": 53, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Requests per second rated over a 5 minutes period.\n Includes API call and version.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3121,7 +2051,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3132,50 +2062,48 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 28 + "y": 5 }, - "id": 55, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3184,21 +2112,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_log_log_size{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}) by (topic)", - "legendFormat": "{{topic}}", - "refId": "A" + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{request}}(v{{version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Log size per Topic", + "title": "Requests rates", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Request Errors per second rated over a 5 minutes period.\n Includes API call and version.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3209,7 +2154,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3220,50 +2165,48 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 28 + "h": 10, + "w": 8, + "x": 8, + "y": 5 }, - "id": 56, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3272,37 +2215,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_log_log_size{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}) by (instance)", - "legendFormat": "{{instance}}", - "refId": "A" + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{env=\"$env\",hostname=~\"$broker\",error!=\"NONE\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{error}}@{{request}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Log size per Broker", + "title": "Error rates", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Logs size", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request rates", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 6 }, - "id": 58, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "description": "Sum of connections count across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3313,7 +2299,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3324,75 +2310,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 29 + "y": 6 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - RequestQueueTimeMs", + "title": "Sum of Connections alive per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "description": "Sum of rate of connections created across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3403,7 +2399,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3414,75 +2410,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 29 + "h": 10, + "w": 8, + "x": 8, + "y": 6 }, - "id": 61, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - LocalTimeMs", + "title": "Sum of Connections creation rate per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "description": "Sum of rate of connections closed across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3493,7 +2499,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3504,50 +2510,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 38 + "x": 16, + "y": 6 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3556,23 +2557,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - RemoteTimeMs", + "title": "Sum of Connections close rate per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "description": "Sum of connections count across cluster by listeners", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3583,7 +2599,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3594,50 +2610,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 38 + "x": 0, + "y": 7 }, - "id": 63, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3646,23 +2657,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - ResponseQueueTimeMs", + "title": "Sum of Connections alive per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "description": "Sum of rate of connections created across cluster by listener", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3673,7 +2699,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3684,51 +2710,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 16, - "y": 38 + "x": 8, + "y": 7 }, - "id": 64, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3737,38 +2757,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - ResponseSendTimeMs", + "title": "Sum of Connections creation rate per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Producer Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 68, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "description": "Sum of rate of connections closed across cluster by listener", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3779,7 +2799,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3790,76 +2810,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 30 + "h": 10, + "w": 8, + "x": 16, + "y": 7 }, - "id": 69, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - RequestQueueTimeMs", + "title": "Sum of Connections close rate per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "description": "Rate of ISR shrinks per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3870,7 +2941,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3881,75 +2952,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 30 + "h": 10, + "w": 8, + "x": 0, + "y": 8 }, - "id": 70, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - LocalTimeMs", + "title": "Rate of ISR Shrinks/sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "description": "Rate of ISR expands per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3960,7 +3041,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3971,50 +3052,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 0, - "y": 39 + "x": 8, + "y": 8 }, - "id": 71, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4023,23 +3099,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrexpandspersec{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - RemoteTimeMs", + "title": "Rate of ISR Expands/sec", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "In-Sync Replicas", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4050,7 +3183,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4061,51 +3194,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 8, - "y": 39 + "x": 0, + "y": 9 }, - "id": 72, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4114,23 +3241,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - ResponseQueueTimeMs", + "title": "Produce: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4141,7 +3283,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4152,51 +3294,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 16, - "y": 39 + "x": 8, + "y": 9 }, - "id": 73, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4205,38 +3341,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - ResponseSendTimeMs", + "title": "Produce: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Consumer Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 34 - }, - "id": 66, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4247,7 +3383,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4258,76 +3394,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 31 + "h": 10, + "w": 8, + "x": 16, + "y": 9 }, - "id": 74, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - RequestQueueTimeMs", + "title": "Produce: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4338,7 +3483,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4349,76 +3494,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 31 + "h": 10, + "w": 8, + "x": 0, + "y": 10 }, - "id": 75, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - LocalTimeMs", + "title": "Produce: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4429,7 +3583,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4440,50 +3594,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 0, - "y": 40 + "x": 8, + "y": 10 }, - "id": 76, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4492,23 +3641,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - RemoteTimeMs", + "title": "Produce: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Producer", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4519,7 +3725,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4530,50 +3736,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 8, - "y": 40 + "x": 0, + "y": 11 }, - "id": 77, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4582,23 +3783,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - ResponseQueueTimeMs", + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4609,7 +3825,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4620,51 +3836,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 16, - "y": 40 + "x": 8, + "y": 11 }, - "id": 78, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4673,37 +3883,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - ResponseSendTimeMs", + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Fetch Follower Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 35 - }, - "id": 102, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4714,7 +3925,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4725,50 +3936,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 + "h": 10, + "w": 8, + "x": 16, + "y": 11 }, - "id": 98, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4777,22 +3983,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_count{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections count per listener", + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4803,7 +4025,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4814,50 +4036,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 + "h": 10, + "w": 8, + "x": 0, + "y": 12 }, - "id": 100, + "height": null, + "hideTimeOverride": false, + "id": 48, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4866,22 +4083,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_count{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections count per broker", + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4892,7 +4125,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4903,50 +4136,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 + "h": 10, + "w": 8, + "x": 8, + "y": 12 }, - "id": 104, + "height": null, + "hideTimeOverride": false, + "id": 49, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4955,22 +4183,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections creation rate per listener", + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Consumer Fetch", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 50, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4981,7 +4267,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4992,50 +4278,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 + "h": 10, + "w": 8, + "x": 0, + "y": 13 }, - "id": 106, + "height": null, + "hideTimeOverride": false, + "id": 51, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5044,22 +4325,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections creation rate per instance", + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5070,7 +4367,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5081,50 +4378,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 48 + "h": 10, + "w": 8, + "x": 8, + "y": 13 }, - "id": 108, + "height": null, + "hideTimeOverride": false, + "id": 52, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5133,22 +4425,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections close rate per listener", + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5159,7 +4467,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5170,50 +4478,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 + "h": 10, + "w": 8, + "x": 16, + "y": 13 }, - "id": 110, + "height": null, + "hideTimeOverride": false, + "id": 53, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5222,23 +4525,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections close rate per instance", + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Tracks the amount of time Acceptor is blocked from accepting connections. See KIP-402 for more details.", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5249,7 +4567,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5260,51 +4578,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percent" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 56 + "y": 14 }, - "id": 112, + "height": null, + "hideTimeOverride": false, + "id": 54, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5313,22 +4625,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_acceptor_acceptorblockedpercent{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} - {{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Acceptor Blocked Percentage", + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5339,7 +4667,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5350,50 +4678,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 + "h": 10, + "w": 8, + "x": 8, + "y": 14 }, - "id": 114, + "height": null, + "hideTimeOverride": false, + "id": 55, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5402,38 +4725,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connections{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (client_software_name, client_software_version)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_software_name}} {{client_software_version}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections per client version", + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Connections", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Replica Fetch", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 15 }, - "id": 120, + "height": null, + "hideTimeOverride": false, + "id": 56, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of consumer groups per group coordinator", + "description": "Number of groups managed by Broker", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5444,7 +4809,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5455,50 +4820,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 33 + "y": 15 }, - "id": 116, + "height": null, + "hideTimeOverride": false, + "id": 57, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5507,24 +4867,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer groups number per coordinator", + "title": "Number of Groups per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of consumer group per state", + "description": "Number of stable groups managed by Broker", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5535,7 +4909,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5546,50 +4920,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 33 + "h": 10, + "w": 8, + "x": 8, + "y": 15 }, - "id": 118, + "height": null, + "hideTimeOverride": false, + "id": 58, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5598,63 +4969,136 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "stable", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "preparing-rebalance", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "preparing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "dead", - "refId": "C" + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "completing-rebalance", - "refId": "D" + "intervalFactor": 2, + "legendFormat": "completing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "empty", - "refId": "E" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Nb consumer groups per state", + "title": "Number of Groups per Broker per Status", + "transformations": [], + "transparent": false, "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Group Coordinator", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 16 }, - "id": 46, + "height": null, + "hideTimeOverride": false, + "id": 59, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of messages produced converted to match the log.message.format.version.", + "description": "Sum of produce message conversions per second.\n This value increases when the broker receives produce messages from clients using older versions.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5665,7 +5109,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5676,77 +5120,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "opsps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 34 + "y": 16 }, - "id": 48, + "height": null, + "hideTimeOverride": false, + "id": 60, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of produced message conversion", + "title": "Sum of Produce conversion rate per sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of messages consumed converted at consumer to match the log.message.format.version.", + "description": "Sum of fetch message conversions per second.\n This value increases when the broker receives fetch messages from clients using older versions.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5757,7 +5209,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5768,255 +5220,292 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "opsps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 34 + "y": 16 }, - "id": 51, + "height": null, + "hideTimeOverride": false, + "id": 61, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of consumed message conversion", + "title": "Sum of Fetch conversion rate per sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": null, - "description": "Number of connection per client version", + "datasource": "Prometheus", + "description": "Sum of connections aggregated by client version and name.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } }, - "decimals": 0, "mappings": [], - "unit": "short" + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 34 + "y": 16 }, - "id": 96, + "height": null, + "hideTimeOverride": false, + "id": 62, "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", - "reduceOptions": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, "tooltip": { "mode": "single" } }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connections{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (client_software_name, client_software_version) ", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connections{env=\"$env\",hostname=~\"$broker\"}) by (client_software_name,client_software_version)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_software_name}} - {{client_software_version}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_software_name}} (v{{client_software_version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Client version repartition", - "type": "piechart" + "title": "Sum of Connections per version", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Message Conversion", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_server_kafkaserver_brokerstate{env=\"${env}\"}, instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", + "label": "Broker", "multi": true, - "name": "instance", + "name": "broker", "options": [], - "query": { - "query": "label_values(kafka_server_kafkaserver_brokerstate{env=\"${env}\"}, instance)", - "refId": "Prometheus-instance-Variable-Query" - }, + "query": "label_values(kafka_server_replicamanager_leadercount{env=\"$env\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": "", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "0.95" - ], - "value": [ - "0.95" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(quantile)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Percentile", - "multi": true, - "name": "percentile", + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", "options": [], - "query": { - "query": "label_values(quantile)", - "refId": "Prometheus-percentile-Variable-Query" - }, + "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{env=\"$env\"}, quantile)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -6043,6 +5532,6 @@ }, "timezone": "browser", "title": "Kafka cluster", - "uid": "qu-QZdfZz", - "version": 2 -} \ No newline at end of file + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json index a2983dde..1987f361 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json @@ -1,370 +1,547 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Monitor Apache Kafka Connect", + "description": "Overview of the Kafka Connect cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 5, - "iteration": 1632255569594, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 199, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "General", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Overview", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Kafka Connect online workers returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 212, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",version!=\"\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Total", + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 213, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Running", + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "orange", - "value": 1 + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 215, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Paused", + "title": "Connect: Sum of Running Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "red", - "value": 1 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 12, - "y": 1 + "y": 0 }, - "id": 214, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Failed", + "title": "Connect: Sum of Paused Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "yellow", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 16, - "y": 1 + "y": 0 }, - "id": 216, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Unassigned", + "title": "Connect: Sum of Failed Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "purple", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "clockms" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 20, - "y": 1 + "y": 0 }, - "id": 217, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -372,316 +549,434 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Destroyed", + "title": "Connect: Time since last rebalance", "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "Prometheus", - "description": "", + "description": "Connect workers metadata and stats.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] + "align": "auto", + "displayMode": "auto", + "filterable": false }, - { - "matcher": { - "id": "byName", - "options": "running" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] + "thresholds": { + "mode": "absolute", + "steps": [] } - ] + }, + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 7, - "w": 12, + "h": 5, + "w": 24, "x": 0, - "y": 4 + "y": 1 }, - "id": 227, + "height": null, + "hideTimeOverride": false, + "id": 8, "interval": null, "links": [], - "maxDataPoints": 1, + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single" - } + "showHeader": true }, - "pluginVersion": "7.0.5", + "repeat": null, + "repeatDirection": null, + "span": 6, "targets": [ { - "expr": "sum (kafka_connect_connector_metrics{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status=\"running\"})", + "datasource": null, + "expr": "kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",start_time_ms!=\"\"}", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "running", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum (kafka_connect_connector_metrics{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status=\"stopped\"})", + "datasource": null, + "expr": "kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",version!=\"\"}", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "stopped", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum (kafka_connect_connector_metrics{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status=\"paused\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "paused", - "refId": "C" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connector repartition per status", - "type": "piechart" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "destroyed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B877D9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "unassigned" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FADE2A", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 4 - }, - "id": 219, - "interval": null, - "links": [], - "maxDataPoints": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "7.0.5", - "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_startup_success_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "running", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "failed", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "paused", - "refId": "C" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_startup_success_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "unassigned", - "refId": "D" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_startup_failure_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "destroyed", - "refId": "E" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Task repartition per status", - "type": "piechart" - }, - { - "datasource": "Prometheus", - "description": "Status of connectors over time", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, + "title": "Connect Workers", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "hostname" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "hostname", + "kafka_connect_cluster_id 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + "env 1" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "env 1": 0, + "hostname": 2, + "kafka_connect_cluster_id 1": 1, + "start_time_ms": 3, + "version": 4 + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + "env 1": "environment", + "hostname": "worker", + "kafka_connect_cluster_id 1": "cluster", + "start_time_ms": "start time", + "version": "version" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "Prometheus", + "description": "Connectors deployed and task stats.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connectors", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "connector" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "How much time the connector tasks are in running state.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -689,144 +984,88 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "percentunit" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/stopped.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/paused.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/running.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 6, + "h": 10, "w": 12, "x": 0, - "y": 11 + "y": 3 }, - "id": 228, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum (kafka_connect_connector_metrics{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status!=\"\"}) by (status) ", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_running_ratio{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{status}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Status of connectors", + "title": "Tasks Running Ratio", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Status of tasks over time", + "description": "Average ime spent on rebalance state.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -845,223 +1084,140 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "destroyed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "purple", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "running" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "unassigned" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 6, + "h": 10, "w": 12, "x": 12, - "y": 11 + "y": 3 }, - "id": 226, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "running", - "refId": "A" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "failed", - "refId": "B" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "paused", - "refId": "C" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "destroyed", - "refId": "D" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "unassigned", - "refId": "E" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Status of tasks", + "title": "Rebalance Latency (avg.)", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 4 }, - "id": 221, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Cores", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1072,50 +1228,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percent" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 18 + "y": 4 }, - "id": 223, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1124,34 +1275,49 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Memory", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1159,53 +1325,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 18 + "y": 4 }, - "id": 224, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1214,39 +1375,49 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\",area=\"heap\"}", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "% time in GC", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1254,55 +1425,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 2, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 18 + "y": 4 }, - "id": 225, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1311,1933 +1475,68 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM GC time", + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 26 - }, - "id": 97, - "panels": [ - { - "columns": [], - "datasource": "Prometheus", - "fontSize": "90%", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 146, - "pageSize": 100, - "showHeader": true, - "sort": { - "col": 7, - "desc": true - }, - "styles": [ - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "__name__", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "env", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "client_id", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Startup time", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "MMMM D, YYYY LT", - "decimals": 2, - "mappingType": 1, - "pattern": "start_time_ms", - "thresholds": [], - "type": "date", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #B", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #A", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Connector Count", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #C", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Connector Startup Success Total", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #D", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Connector Startup Failure Total", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #E", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Number of rebalances", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #F", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Average time of Rebalances", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #G", - "thresholds": [], - "type": "number", - "unit": "ms", - "valueMaps": [ - { - "text": "0", - "value": "NaN" - }, - { - "text": "N/A", - "value": "null" - } - ] - }, - { - "alias": "Time since last rebalance", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #H", - "thresholds": [], - "type": "number", - "unit": "ms" - }, - { - "alias": "Worker instance", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Number of tasks", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #I", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Task Startup Success ", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #J", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Task Startup Failure", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #K", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "right", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "kafka_connect_app_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\",start_time_ms!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" - }, - { - "expr": "kafka_connect_app_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\",version!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_startup_success_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "E" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "I" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_startup_success_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "J" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_startup_failure_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "K" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect Worker", - "transform": "table", - "transparent": true, - "type": "table-old" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of network operations (reads or writes) on all connections per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 35 - }, - "hiddenSeries": false, - "id": 95, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_network_io_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network IO Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Bytes per second read off all sockets", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 35 - }, - "hiddenSeries": false, - "id": 91, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_incoming_byte_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Incoming Byte Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of outgoing bytes sent per second to all servers", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 35 - }, - "hiddenSeries": false, - "id": 171, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Outgoing Byte Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Current number of active connections", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 169, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_connection_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Current number of active connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Connections that failed authentication", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 42 - }, - "hiddenSeries": false, - "id": 170, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_failed_authentication_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Failed authentication connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Connections that were successfully authenticated using SASL or SSL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 42 - }, - "hiddenSeries": false, - "id": 174, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_successful_authentication_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Success authentication connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of requests sent per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 49 - }, - "hiddenSeries": false, - "id": 172, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_request_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Average number of requests", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Responses received and sent per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 49 - }, - "hiddenSeries": false, - "id": 173, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_response_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Responses received and sent", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Fraction of time the I/O thread spent doing I/O", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 49 - }, - "hiddenSeries": false, - "id": 93, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_io_ratio{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "IO Ratio", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "Connect Worker", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 132, - "panels": [ - { - "columns": [], - "datasource": "Prometheus", - "fontSize": "110%", - "gridPos": { - "h": 11, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 129, - "pageSize": 100, - "showHeader": true, - "sort": { - "col": 8, - "desc": true - }, - "styles": [ - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "__name__", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "class", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_class", - "preserveFormat": false, - "thresholds": [], - "type": "string", - "unit": "short", - "valueMaps": [] - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "env", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Nb of Tasks destroyed", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#B877D9", - "#B877D9" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "mappingType": 1, - "pattern": "Value #B", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #A", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #C", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #D", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "#F2495C" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "status", - "thresholds": [ - "2" - ], - "type": "string", - "unit": "short", - "valueMaps": [ - { - "text": "running", - "value": "1" - }, - { - "text": "paused", - "value": "2" - }, - { - "text": "stopped", - "value": "3" - } - ] - }, - { - "alias": "name", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "type", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_type", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "version", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_version", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of tasks", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #E", - "thresholds": [ - "0", - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of Tasks running", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #F", - "thresholds": [ - "0", - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of Tasks failed", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#F2495C", - "#F2495C" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "mappingType": 1, - "pattern": "Value #G", - "preserveFormat": false, - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short", - "valueMaps": [ - { - "text": "0", - "value": "null" - } - ] - }, - { - "alias": "Nb of Tasks paused", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#FF9830", - "#FF9830" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #H", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short", - "valueMaps": [ - { - "text": "0", - "value": "null" - } - ] - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #I", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Nb of Tasks unassigned", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#FADE2A", - "#FADE2A" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #J", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "right", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(label_replace(label_replace(kafka_connect_connector_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status!=\"\"}, \"status\", \"1\", \"status\", \"running\"), \"status\", \"2\", \"status\", \"paused\"), \"status\", \"3\", \"status\", \"stopped\")", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "I" - }, - { - "expr": "kafka_connect_connector_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",connector_type!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "expr": "kafka_connect_connector_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",connector_version!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "expr": "kafka_connect_connector_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",connector_class!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "E" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "F" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "G" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "H" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_destroyed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_unassigned_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "J" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connectors", - "transform": "table", - "transformations": [], - "type": "table-old" - } - ], - "title": "Connector details", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 + "y": 6 }, - "id": 234, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Rebalances average time", + "description": "Maximum and average size of the batches processed by the connector task.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3248,8 +1547,8 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "opacity", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, @@ -3259,52 +1558,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 24, + "h": 10, + "w": 8, "x": 0, - "y": 29 + "y": 6 }, - "id": 209, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3313,119 +1605,52 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_avg{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rebalances average time", - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time since last rebalance", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 230, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "repeat": "instance", - "targets": [ { - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",job=\"connect\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"} >= 0", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_max{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "($instance) Time since last rebalance ", - "type": "stat" - } - ], - "title": "Rebalances", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 112, - "panels": [ + "timeFrom": null, + "timeShift": null, + "title": "Batch size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average size of the batches processed by the connector", + "description": "Percentage of offset commit successful and failed.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3436,7 +1661,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3447,78 +1672,99 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "decbytes" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 30 + "h": 10, + "w": 8, + "x": 8, + "y": 6 }, - "id": 113, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_batch_size_avg{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_failure_percentage{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (failure)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Batch Size Average", + "title": "Offset commit success/failure", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Maximum size of the batches processed by the connector", + "description": "Average and Maximum time in milliseconds taken by the task to commit offsets", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3529,7 +1775,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3540,78 +1786,141 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "decbytes" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 30 + "h": 10, + "w": 8, + "x": 16, + "y": 6 }, - "id": 114, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_batch_size_max{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Batch Size Max", + "title": "Offset commit latency", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average percentage of the task’s offset commit attempts that succeeded", + "description": "Total number of failures seen by task.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3622,7 +1931,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3633,80 +1942,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percentunit" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 37 + "y": 7 }, - "id": 115, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_failures{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Offset commit success percentage", + "title": "Total Record Failures", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to commit offsets", + "description": "Total number of errors seen by task.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3717,7 +2031,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3728,78 +2042,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 37 + "h": 10, + "w": 8, + "x": 8, + "y": 7 }, - "id": 116, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_errors{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Offset commit Average Time", + "title": "Total Record Error", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The fraction of time this task has spent in the running state.", + "description": "Total number of records skipped seen by task.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3810,7 +2131,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3821,96 +2142,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percentunit" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 44 + "h": 10, + "w": 8, + "x": 16, + "y": 7 }, - "id": 117, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_running_ratio{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_records_skipped{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Running ratio", + "title": "Total Records Skipped", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Task metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 201, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of failures seen by task", + "description": "Total number of records logged seen by task.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3921,7 +2231,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3932,80 +2242,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 31 + "y": 8 }, - "id": 203, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_record_failures{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_errors_logged{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record failures", + "title": "Total Errors Logged", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of errors seen by task", + "description": "Total number of retries seen by task.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4016,7 +2331,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4027,80 +2342,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 31 + "y": 8 }, - "id": 205, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_record_errors{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_retries{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record errors", + "title": "Total Retries", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of records skipped by task", + "description": "Number of produce requests to dead letter topics.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4111,7 +2431,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4122,80 +2442,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 16, - "y": 31 + "y": 8 }, - "id": 206, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_records_skipped{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record skipped", + "title": "Dead Letter Topic Requests", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Task Errors", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of messages that was logged into either the dead letter queue or with Log4j", + "description": "Average and Maximum time in milliseconds taken by this task to poll for a batch of source records", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4206,7 +2573,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4217,80 +2584,99 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 38 + "y": 9 }, - "id": 208, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_errors_logged{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total errors logged", + "title": "Poll Batch Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of retries made by task", + "description": "Before transformations are applied, \n this is the average per-second number of records produced or \n polled by the task belonging to the named source connector in the worker\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4301,7 +2687,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4312,80 +2698,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 38 + "y": 9 }, - "id": 207, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_retries{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total retries", + "title": "Source Record Poll Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of produce requests to the dead letter queue", + "description": "After transformations are applied, \n this is the average per-second number of records output from the transformations and \n written to Kafka for the task belonging to the named source connector in the worker \n (excludes any records filtered out by the transformations)\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4396,7 +2787,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4407,80 +2798,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 45 + "x": 16, + "y": 9 }, - "id": 202, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Dead letter queue Produce requests", + "title": "Source Record Write Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Source Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of produce requests to the dead letter queue", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4491,7 +2929,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4502,95 +2940,99 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 45 + "x": 0, + "y": 10 }, - "id": 204, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Dead letter queue Produce requests", + "title": "Put Batch Latency", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Task Errors metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 139, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to poll for a batch of source records", + "description": "Before transformations are applied, \n this is the average per-second number of records read from Kafka \n for the task belonging to the named sink connector in the worker\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4601,7 +3043,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4612,79 +3054,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 32 + "x": 8, + "y": 10 }, - "id": 140, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_sink_record_read_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Average time", + "title": "Sink Record Read Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum time in milliseconds taken by this task to poll for a batch of source records", + "description": "After transformations are applied, \n this is the average per-second number of records output from the transformations and \n sent to the task belonging to the named sink connector in the worker \n (excludes any records filtered out by the transformations)\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4695,7 +3143,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4706,80 +3154,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 32 + "x": 16, + "y": 10 }, - "id": 141, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_sink_record_send_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Max time", + "title": "Sink Record Send Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of records produced/polled (before transformation) by this task belonging to the named source connector in this worker.", + "description": "Number of topic partitions assigned to the task and \n which belong to the named sink connector in the worker\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4790,7 +3243,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4801,79 +3254,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 39 + "y": 11 }, - "id": 144, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_partition_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Poll rate", + "title": "Partition Count", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Sink Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of records output from the transformations and written to Kafka for this task belonging to the named source connector in this worker. This is after transformations are applied and excludes any records filtered out by the transformations.", + "description": "Incoming byte rate per second per worker.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4884,7 +3385,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4895,79 +3396,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 39 + "x": 0, + "y": 5 }, - "id": 143, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_write_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Write rate", + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records that have been produced by this task but not yet completely written to Kafka.", + "description": "Outgoing byte rate per second per worker.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4978,7 +3485,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4989,79 +3496,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 46 + "x": 8, + "y": 5 }, - "id": 142, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_active_count_avg{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Active Count average", + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum number of records that have been produced by this task but not yet completely written to Kafka.", + "description": "Fraction of time the I/O thread spent doing I/O", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5072,7 +3585,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5083,94 +3596,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 46 + "x": 0, + "y": 6 }, - "id": 145, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_active_count_max{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_io_ratio{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Active Count max", + "title": "IO Ratio", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Source metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 134, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of topic partitions assigned to this task belonging to the named sink connector in this worker.", + "description": "Average number of network operations (reads or writes) on all connections per second", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5181,7 +3685,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5192,76 +3696,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 33 + "x": 8, + "y": 6 }, - "id": 135, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_partition_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_network_io_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Partition Count", + "title": "Network IO Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to put a batch of sinks records", + "description": "Number of active connections", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5272,7 +3785,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5283,76 +3796,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 33 + "x": 0, + "y": 7 }, - "id": 136, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_connection_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Average time", + "title": "Active Connections", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum time in milliseconds taken by this task to put a batch of sinks records", + "description": "Successful and failed authentications per second.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5363,7 +3885,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5374,221 +3896,235 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 16, - "y": 33 + "x": 8, + "y": 7 }, - "id": 137, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_successful_authentication_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_failed_authentication_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Max time", + "title": "Rate of Authentication", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Sink metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connect Workers", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-connect" + ], "templating": { "list": [ { - "allValue": ".+", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": ".+", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_cluster_id)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "kafka_connect_cluster_id", + "includeAll": false, + "label": "Connect cluster", + "multi": false, + "name": "connect_cluster", "options": [], - "query": { - "query": "label_values(kafka_connect_cluster_id)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\"}, kafka_connect_cluster_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "All", - "value": "$__all" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_app_info{job=\"kafka-connect\", env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"},instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", - "multi": false, - "name": "instance", + "label": "Connect worker", + "multi": true, + "name": "connect_worker", "options": [], - "query": { - "query": "label_values(kafka_connect_app_info{job=\"kafka-connect\", env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"},instance)", - "refId": "Prometheus-instance-Variable-Query" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_connector_task_metrics_pause_ratio{job=\"kafka-connect\", env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"},connector)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Connector name", + "label": "Connector", "multi": true, "name": "connector", "options": [], - "query": { - "query": "label_values(kafka_connect_connector_task_metrics_pause_ratio{job=\"kafka-connect\", env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"},connector)", - "refId": "Prometheus-connector-Variable-Query" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}, connector)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -5611,8 +4147,8 @@ "30d" ] }, - "timezone": "", + "timezone": "browser", "title": "Kafka Connect cluster", - "uid": "AEaSQ97mz", - "version": 1 -} \ No newline at end of file + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json index 3763e310..7d19cec2 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json @@ -1,611 +1,3236 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Kafka Consumer Java client metrics", + "description": "Overview of the Kafka consumers", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 10, - "iteration": 1635962856628, + "hideControls": false, + "id": null, "links": [], "panels": [ { - "datasource": null, - "description": "", - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 58, - "options": { - "content": "# Disclaimer\n\n⚠️ This dashboard has some sample thresholds, this example is not meant to fit all use cases nor is it meant for production. Think of it as a learning tool to help you become comfortable with the metrics and thresholding.\n", - "mode": "markdown" - }, - "pluginVersion": "8.1.3", - "timeFrom": null, - "timeShift": null, - "type": "text" - }, - { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 3 + "y": 0 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Key metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { - "datasource": null, - "description": "The number of commit calls per second .", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 0.01 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 6, + "h": 5, + "w": 4, "x": 0, - "y": 4 + "y": 0 }, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_coordinator_metrics_commit_rate{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit rate", + "title": "Record Consumed Rate", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in ms a request was throttled by a broker.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "ms" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 4 + "h": 5, + "w": 4, + "x": 4, + "y": 0 }, - "id": 10, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Average fetch throttle time", + "title": "Records Lag", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Rate of failed authentication attempts\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "green", - "value": 0 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 4 + "h": 5, + "w": 4, + "x": 8, + "y": 0 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_metrics_failed_authentication_rate{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Failed auth rate", + "title": "Rebalance Rate per hour", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": null, - "description": "The number of total rebalance events per hour, both successful and unsuccessful rebalance attempts.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "green", - "value": 0 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 4 + "h": 5, + "w": 4, + "x": 12, + "y": 0 }, - "id": 40, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"} + kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rebalance rate per hour", + "title": "Failed Rebalance Rate per hour", + "transformations": [], + "transparent": false, "type": "stat" }, { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 16, - "panels": [], - "title": "System", - "type": "row" - }, - { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Cores", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "percent" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 9 + "h": 5, + "w": 4, + "x": 16, + "y": 0 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$hostname\"}[5m])", + "datasource": null, + "expr": "count(kafka_consumer_app_info{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\", version!=\"\"}) by (version)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", - "type": "timeseries" + "title": "Versions", + "transformations": [], + "transparent": false, + "type": "stat" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Memory", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 9 - }, - "id": 64, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + "h": 1, + "w": 24, + "x": 0, + "y": 1 }, - "pluginVersion": "8.1.3", - "targets": [ + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "exemplar": true, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$hostname\"})", - "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { - "expr": "jvm_memory_bytes_max{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\",area=\"heap\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM Memory Used", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "% time in GC", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cts" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Lag Max", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Response Time (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Last Heartbeat Seconds Ago", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Rate Per Hour", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Assigned Partitions", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Consumer group", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] }, - { - "color": "red", - "value": 80 - } - ] + "unit": "cps" + }, + "overrides": [] }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 9 - }, - "id": 66, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Select Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$hostname\"}[5m]))", - "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM GC time", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 24, - "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes consumed per second\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -616,7 +3241,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -627,51 +3252,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 1, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ns" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 6 + "y": 10 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -680,25 +3299,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_total{topic=~\"$topic\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes consumed rate", + "title": "IO time avg.", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records consumed per second.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -709,7 +3341,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -720,50 +3352,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ns" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 + "h": 10, + "w": 8, + "x": 8, + "y": 10 }, - "id": 9, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -772,25 +3399,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_consumer_consumer_fetch_manager_metrics_records_consumed_total{topic=~\"$topic\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rate of records consumed", + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes consumed per topic per second.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -801,7 +3483,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -812,51 +3494,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 1, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 14 + "y": 11 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -865,25 +3541,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_total{topic!=\"\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval])) by (topic)", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes consumed rate per topic", + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records consumed per second per topic.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -894,7 +3583,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -905,50 +3594,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 14 + "h": 10, + "w": 8, + "x": 8, + "y": 11 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -957,40 +3641,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum by (topic) (rate(kafka_consumer_consumer_fetch_manager_metrics_records_consumed_total{topic!=\"\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval]))", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rate of records consumed per topic", + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Throughput", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 30, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of fetch requests per second.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1001,7 +3683,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1012,50 +3694,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 7 + "x": 16, + "y": 11 }, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1064,25 +3741,52 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_consumer_consumer_fetch_manager_metrics_fetch_total{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch request rate", + "title": "Request Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The average number of bytes fetched per request for a topic", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1093,7 +3797,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1104,50 +3808,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" + "steps": [] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 7 + "x": 0, + "y": 12 }, - "id": 34, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1156,24 +3855,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{topic=~\"$topic\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}} - {{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch size avg", + "title": "Request Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The average time taken for a fetch request.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1184,7 +3897,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1195,50 +3908,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 16, - "y": 7 + "x": 8, + "y": 12 }, - "id": 32, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1247,39 +3955,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch latency average", + "title": "Response Rate", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Consumer Fetch Metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 19 + "y": 13 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": null, - "description": "The average time taken for a commit request", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1290,7 +4039,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1301,14 +4050,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -1316,34 +4063,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 6 + "y": 11 }, - "id": 36, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1352,24 +4097,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit latency average", + "title": "Bytes Consumed Rate per Topic", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The number of commit calls per second .", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1380,7 +4139,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1391,50 +4150,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 + "h": 10, + "w": 8, + "x": 8, + "y": 11 }, - "id": 43, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1443,39 +4197,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_coordinator_metrics_commit_rate{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit rate", + "title": "Records Consumed Rate per Topic", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Consumer Commit Metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 26, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of simultaneous connections\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1486,7 +4239,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1497,50 +4250,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 48 + "y": 12 }, - "id": 11, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1549,25 +4297,52 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_metrics_connection_count{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Current connection count", + "title": "Fetch Size per Topic", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "response rate per node\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1578,7 +4353,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1589,50 +4364,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "normal" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 + "h": 10, + "w": 8, + "x": 8, + "y": 11 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1641,207 +4411,146 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_node_metrics_response_rate{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Node reponse rate", + "title": "Records per Request Avg. per Topic", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Connections", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "10s", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-client", + "kafka-consumer" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": "Prometheus", - "definition": "label_values(kafka_consumer_app_info, client_id)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Client ID", - "multi": true, - "name": "client_id", - "options": [], - "query": { - "query": "label_values(kafka_consumer_app_info, client_id)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, + "includeAll": false, "label": "Environment", - "multi": true, + "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_consumergroup_group_lag, group)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Group ID", + "label": "Server", "multi": true, - "name": "consumer_group", + "name": "server", "options": [], - "query": { - "query": "label_values(kafka_consumergroup_group_lag, group)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\"},hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(topic)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "topic", + "label": "Client ID", "multi": true, - "name": "topic", + "name": "client_id", "options": [], - "query": { - "query": "label_values(topic)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\"},client_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": null, - "definition": "label_values(kafka_consumer_app_info, hostname)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Hostname", - "multi": true, - "name": "hostname", - "options": [], - "query": { - "query": "label_values(kafka_consumer_app_info, hostname)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" } ] }, "time": { - "from": "now-15m", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -1853,10 +4562,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", + "timezone": "browser", "title": "Kafka Consumer", - "uid": "-C-IEldWk2", - "version": 1 -} \ No newline at end of file + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json index 57a3610f..7b9b5c02 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json @@ -1,268 +1,360 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Java client Kafka Producer metrics", + "description": "Overview of the Kafka producers", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 5, - "iteration": 1635958303882, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Key metrics", - "type": "row" - }, - { - "datasource": null, - "description": "", - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 58, - "options": { - "content": "# Disclaimer\n\n⚠️ This dashboard has some sample thresholds, this example is not meant to fit all use cases nor is it meant for production. Think of it as a learning tool to help you become comfortable with the metrics and thresholding.\n", - "mode": "markdown" - }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "type": "text" + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of retried record sends for a topic. An increase could signal connectivity problems from the application to the broker. ", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 5, - "w": 5, + "w": 4, "x": 0, - "y": 4 + "y": 0 }, - "id": 9, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Retry rate", + "title": "Record Send Rate", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of record sends that resulted in errors.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { "color": "red", - "value": 1 + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 5, - "w": 5, - "x": 5, - "y": 4 + "w": 4, + "x": 4, + "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_record_error_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Record error rate", + "title": "Error Rate", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": null, - "description": "he total amount of buffer memory that is not being used (either unallocated or in the free list).", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#EAB839", - "value": 5 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "green", - "value": 10 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] }, - "unit": "decbytes" + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 5, "w": 4, - "x": 10, - "y": 4 + "x": 8, + "y": 0 }, - "id": 56, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_buffer_available_bytes{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Free buffer space", + "title": "Retry Rate", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in ms a request was throttled by a broker.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -272,406 +364,1119 @@ }, "gridPos": { "h": 5, - "w": 5, - "x": 14, - "y": 4 + "w": 4, + "x": 12, + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_produce_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "count(kafka_producer_app_info{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\",version!=\"\"}) by (version)", "format": "time_series", "hide": false, "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce request throttle average", + "title": "Versions", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, + "collapsed": true, "datasource": null, - "description": "The average compression rate of record batches for a topic, defined as the average ratio of the compressed batch size over the uncompressed size.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 5, - "x": 19, - "y": 4 - }, - "id": 54, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_compression_rate_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", - "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "steps": [] + } } - ], - "timeFrom": null, - "timeShift": null, - "title": "Compression rate", - "type": "stat" - }, - { - "datasource": "Prometheus", - "description": "The average request latency in ms.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, + "h": 1, + "w": 24, "x": 0, - "y": 9 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + "y": 1 }, - "pluginVersion": "8.1.3", - "targets": [ + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", - "hide": false, - "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce request latency", - "type": "timeseries" - }, - { - "datasource": null, - "description": "The average time in ms record batches spent in the send buffer.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 9 - }, - "id": 52, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_record_queue_time_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record queue time", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "The rate of failed authentication per seconds\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Metadata Age", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_request_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request in-flight", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Error Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 9 - }, - "id": 32, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Record Size", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_failed_authentication_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", - "hide": false, - "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Failed authentication rate", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 64, - "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Cores", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -682,50 +1487,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" + "steps": [] + }, + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 0, - "y": 18 + "x": 8, + "y": 4 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -734,34 +1534,63 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$hostname\"}[5m])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Record Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Memory", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -769,53 +1598,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 8, - "y": 18 + "x": 16, + "y": 4 }, - "id": 66, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -824,40 +1648,63 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$hostname\"})", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "jvm_memory_bytes_max{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\",area=\"heap\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "% time in GC", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -865,55 +1712,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 2, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percentunit" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 16, - "y": 18 + "x": 0, + "y": 5 }, - "id": 68, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -922,50 +1762,63 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$hostname\"}[5m]))", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM GC time", + "title": "Batch Size", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "System", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 25, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes sent per second to the broker.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Bytes/s", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -976,50 +1829,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 19 + "h": 10, + "w": 8, + "x": 8, + "y": 5 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1028,35 +1876,49 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_outgoing_byte_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outgoing byte rate", + "title": "Batch Split Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes sent per second to the broker per topic.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Bytes/s", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1067,50 +1929,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" + "steps": [] + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 19 + "h": 10, + "w": 8, + "x": 16, + "y": 5 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1119,36 +1976,91 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(kafka_producer_producer_topic_metrics_byte_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\",topic=~\"$topic\"}[$__rate_interval])) by (topic)", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outgoing byte rate per topic", + "title": "Compression Rate (avg.)", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of messages sent per second to the broker.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Messages/s", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1159,50 +2071,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 27 + "y": 6 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1211,36 +2118,49 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_record_send_total{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_count{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outgoing messages per second", + "title": "Connection Count", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of messages sent per second to the broker per topic.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Messages/s", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1251,50 +2171,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 27 + "h": 10, + "w": 8, + "x": 8, + "y": 6 }, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1303,40 +2218,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(kafka_producer_producer_topic_metrics_record_send_total{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}[$__rate_interval])) by (topic)", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outgoing messages per second per topic", + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Throughput", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 27, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes sent per partition per-request.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1347,7 +2260,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1358,50 +2271,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 20 + "x": 16, + "y": 6 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1410,25 +2318,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_batch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Batch size average", + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The current number of in-flight requests awaiting a response.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1439,7 +2360,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1450,50 +2371,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 20 + "x": 0, + "y": 7 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1502,25 +2418,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_requests_in_flight{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Request in flight", + "title": "IO ratio", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average record size", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1531,7 +2460,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1542,50 +2471,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 16, - "y": 20 + "x": 8, + "y": 7 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1594,40 +2518,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_record_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Record size average", + "title": "IO wait ratio", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 23, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of requests sent per second to the broker.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1638,7 +2560,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1649,50 +2571,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 0, - "y": 21 + "x": 16, + "y": 7 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1701,25 +2618,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_request_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_select_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce request rate", + "title": "Select Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records per request.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1730,7 +2660,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1741,50 +2671,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" + "steps": [] + }, + "unit": "ns" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 8, - "y": 21 + "x": 0, + "y": 8 }, - "id": 11, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1793,25 +2718,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_request_size_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce request size average", + "title": "IO time avg.", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of response received per second to the broker.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1822,7 +2760,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1833,50 +2771,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ns" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 16, - "y": 21 + "x": 8, + "y": 8 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1885,40 +2818,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_response_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce response rate", + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Produce Request metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 21 + "y": 8 }, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": null, - "description": "The current number of active connections.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1929,7 +2902,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1940,50 +2913,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 22 + "y": 8 }, - "id": 37, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1992,24 +2960,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connection rate", + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "New connections established per second in the window.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2020,7 +3002,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2031,50 +3013,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 22 + "y": 8 }, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2083,24 +3060,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connection creation rate", + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Connections closed per second in the window.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2111,7 +3102,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2122,50 +3113,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 22 + "y": 8 }, - "id": 39, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2174,24 +3160,52 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connection close rate", + "title": "Request Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The fraction of time the I/O thread spent doing I/O.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2202,7 +3216,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2213,50 +3227,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 30 + "y": 9 }, - "id": 40, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2265,24 +3274,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IO Ratio", + "title": "Request Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The average length of time the I/O thread spent waiting for a socket ready for reads or writes in nanoseconds.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2293,7 +3316,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2304,50 +3327,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ns" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 30 + "y": 9 }, - "id": 43, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2356,24 +3374,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IO Wait time average", + "title": "Response Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "datasource": null, - "description": "The average length of time for I/O per select call in nanoseconds.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2384,7 +3458,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2395,50 +3469,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ns" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 16, - "y": 30 + "x": 0, + "y": 10 }, - "id": 41, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2447,24 +3516,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IO Time average", + "title": "Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The fraction of time the I/O thread spent waiting.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2475,7 +3558,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2486,50 +3569,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 0, - "y": 38 + "x": 8, + "y": 10 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2538,24 +3616,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IO Wait Ratio", + "title": "Compression Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Number of times the I/O layer checked for new I/O to perform per second.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2566,7 +3658,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2577,50 +3669,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 8, - "y": 38 + "x": 16, + "y": 10 }, - "id": 44, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2629,103 +3716,138 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Select Rate", + "title": "Record Send Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Connections", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 31, - "panels": [ + }, { "cacheTimeout": null, - "datasource": null, - "description": "", + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } }, - "mappings": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 23 + "y": 11 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 41, "interval": null, "links": [], - "maxDataPoints": 3, + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "displayMode": "list", - "placement": "bottom" - }, - "pieType": "pie", - "reduceOptions": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "6.3.0", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(kafka_producer_app_info{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\",version!=\"\"}) by (version)", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{version}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "API Version", - "type": "piechart" + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The age in seconds of the current producer metadata being used.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2736,7 +3858,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2747,50 +3869,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 16, + "h": 10, + "w": 8, "x": 8, - "y": 23 + "y": 11 }, - "id": 17, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2799,173 +3916,146 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_metadata_age{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Metadata max age", + "title": "Record Error Rate", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Misc", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "10s", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-client", + "kafka-producer" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_producer_app_info, client_id)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Client ID", - "multi": true, - "name": "client_id", + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", "options": [], - "query": { - "query": "label_values(kafka_producer_app_info, client_id)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_producer_producer_topic_metrics_record_send_total, topic)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Kafka topic", + "label": "Server", "multi": true, - "name": "topic", + "name": "server", "options": [], - "query": { - "query": "label_values(kafka_producer_producer_topic_metrics_record_send_total, topic)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\"},hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": [ - "dev" - ], - "value": [ - "dev" - ] + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Environment", + "label": "Client ID", "multi": true, - "name": "env", + "name": "client_id", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\"},client_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false - }, - { - "allValue": null, - "current": { - "selected": false, - "text": [ - "kafka1" - ], - "value": [ - "kafka1" - ] - }, - "datasource": null, - "definition": "label_values(kafka_producer_app_info, hostname)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Hostname", - "multi": true, - "name": "hostname", - "options": [], - "query": { - "query": "label_values(kafka_producer_app_info, hostname)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" } ] }, "time": { - "from": "now-15m", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -2977,10 +4067,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", + "timezone": "browser", "title": "Kafka Producer", - "uid": "-C-IEldWk", - "version": 1 -} \ No newline at end of file + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json index 74b7655f..a78befa6 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json @@ -1,33 +1,30 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Kafka quotass", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 3, - "iteration": 1647426501739, + "hideControls": false, + "id": null, "links": [], "panels": [ { - "datasource": null, - "description": "Indicates the data produce rate of the client in bytes/sec", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -38,7 +35,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -49,34 +46,22 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "Bps" + "unit": "binBps" }, "overrides": [] }, @@ -86,13 +71,20 @@ "x": 0, "y": 0 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -101,24 +93,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, kafka_server_produce_byte_rate{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"})", + "datasource": null, + "expr": "topk(10,kafka_server_produce_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Produce Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the data consume rate of the client in bytes/sec", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -129,7 +135,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -140,34 +146,22 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "Bps" + "unit": "binBps" }, "overrides": [] }, @@ -177,13 +171,20 @@ "x": 8, "y": 0 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -192,24 +193,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, kafka_server_fetch_byte_rate{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"})", + "datasource": null, + "expr": "topk(10,kafka_server_fetch_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Fetch Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the percentage of time spent in broker network and I/O threads to process requests from client group", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -220,7 +235,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -231,32 +246,20 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "percent" }, @@ -268,13 +271,20 @@ "x": 16, "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -283,24 +293,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, kafka_server_request_request_time{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"})", + "datasource": null, + "expr": "topk(10,kafka_server_request_request_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Request Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the amount of time in ms the client was throttled", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -311,7 +335,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -322,32 +346,20 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, @@ -357,15 +369,22 @@ "h": 12, "w": 8, "x": 0, - "y": 12 + "y": 1 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -374,24 +393,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_server_produce_throttle_time{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"} > 0", + "datasource": null, + "expr": "topk(10,kafka_server_produce_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce Bandwidth Throttle", + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the amount of time in ms the client was throttled", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -402,7 +435,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -413,32 +446,20 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, @@ -448,15 +469,22 @@ "h": 12, "w": 8, "x": 8, - "y": 12 + "y": 1 }, + "height": null, + "hideTimeOverride": false, "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -465,24 +493,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_server_fetch_throttle_time{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"} > 0", + "datasource": null, + "expr": "topk(10,kafka_server_fetch_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch Bandwidth Throttle", + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the amount of time in ms the client was throttled", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -493,7 +535,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -504,32 +546,20 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, @@ -539,15 +569,22 @@ "h": 12, "w": 8, "x": 16, - "y": 12 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -556,168 +593,163 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_server_request_throttle_time{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"} > 0", + "datasource": null, + "expr": "topk(10,kafka_server_request_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Request Throttle", + "title": "Request Throttle Time", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-client", + "kafka-quota" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_server_kafkaserver_brokerstate, instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", + "label": "Broker", "multi": true, - "name": "broker_id", + "name": "broker", "options": [], - "query": { - "query": "label_values(kafka_server_kafkaserver_brokerstate, instance)", - "refId": "Prometheus-broker_id-Variable-Query" - }, + "query": "label_values(kafka_server_produce_byte_rate{env=\"$env\"},hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": ".*", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(user)", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "User", "multi": true, "name": "user", "options": [], - "query": { - "query": "label_values(user)", - "refId": "Prometheus-user-Variable-Query" - }, + "query": "label_values(user)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": ".*", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(client_id)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Client Id", + "label": "Client ID", "multi": true, "name": "client_id", "options": [], - "query": { - "query": "label_values(client_id)", - "refId": "Prometheus-client_id-Variable-Query" - }, + "query": "label_values(client_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -727,10 +759,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", + "timezone": "browser", "title": "Kafka Quotas", - "uid": "cwWEgYqMz", - "version": 2 -} \ No newline at end of file + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json index 1fe01cf4..1f1ddccb 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json @@ -1,109 +1,71 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Kafka topics", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 4, - "iteration": 1647426704713, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 19, - "panels": [], - "title": "Overview", - "type": "row" - }, - { - "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1000 - }, - { - "color": "red", - "value": 10000 - } - ] + "steps": [] } - }, - "overrides": [] + } }, "gridPos": { - "h": 5, - "w": 4, + "h": 1, + "w": 24, "x": 0, - "y": 1 - }, - "id": 9, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" + "y": 0 }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{job=\"kafka-broker\",env=~\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Total # of Topics", - "type": "stat" + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -114,7 +76,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -125,50 +87,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 10, - "w": 13, - "x": 4, - "y": 1 + "w": 12, + "x": 0, + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -177,23 +136,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"kafka-broker\",topic=~\"$topic\",env=~\"$env\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{env=\"$env\",topic=~\"$topic\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Messages In", + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -204,7 +178,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -215,32 +189,22 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "bytes" }, @@ -248,17 +212,24 @@ }, "gridPos": { "h": 10, - "w": 7, - "x": 17, - "y": 1 + "w": 12, + "x": 12, + "y": 0 }, - "id": 17, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -267,82 +238,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(kafka_log_log_size{job=\"kafka-broker\",env=\"$env\",topic=~\"$topic\"}) by (topic))", + "datasource": null, + "expr": "topk(10, sum(kafka_log_log_size{env=\"$env\",topic=~\"$topic\"}) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Log size", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 10000 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 6 - }, - "id": 11, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_globalpartitioncount{job=\"kafka-broker\",env=~\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total # of Partitions", - "type": "stat" - }, - { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -353,7 +280,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -364,13 +291,13 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -379,34 +306,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 12, "x": 0, - "y": 11 + "y": 1 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -415,23 +340,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",topic=~\"$topic\",env=~\"$env\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",topic=~\"$topic\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes In", + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -442,7 +382,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -453,13 +393,13 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -468,34 +408,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 12, "x": 12, - "y": 11 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -504,24 +442,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",topic=~\"$topic\",env=~\"$env\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",topic=~\"$topic\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes Out", + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -532,7 +484,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -543,50 +495,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 0, - "y": 20 + "y": 2 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -595,24 +544,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{job=\"kafka-broker\", env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ env=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce Request per sec", + "title": "Produce Requests/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -623,7 +586,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -634,50 +597,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 12, - "y": 20 + "y": 2 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -686,376 +646,322 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{job=\"kafka-broker\", env=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ env=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch Request per sec", + "title": "Consumer Fetch Requests/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 3 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { - "align": null, + "align": "auto", "displayMode": "auto", - "filterable": false + "filterable": true }, - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "partition" - }, - "properties": [ - { - "id": "custom.width", - "value": 103 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "offset" - }, - "properties": [ - { - "id": "custom.width", - "value": 137 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "instance" - }, - "properties": [ - { - "id": "custom.width", - "value": 155 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "topic" - }, - "properties": [ - { - "id": "custom.width", - "value": 294 - } - ] - } - ] + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 24, "x": 0, - "y": 29 + "y": 3 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "showHeader": true, - "sortBy": [ - { - "desc": false, - "displayName": "partition" - } - ] + "showHeader": true }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": 6, "targets": [ { - "exemplar": true, - "expr": "kafka_log_log_logstartoffset{job=\"kafka-broker\",env=~\"$env\",topic=\"$topic\"}", + "datasource": null, + "expr": "kafka_log_log_logstartoffset{env=\"$env\",topic=~\"$topic\"}", "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_log_log_logendoffset{env=\"$env\",topic=~\"$topic\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Start Offset", + "title": "Offsets", "transformations": [ { - "id": "organize", + "id": "concatenate", "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "env": true, - "instance": false, - "job": true - }, - "indexByName": { - "Time": 0, - "Value": 7, - "__name__": 1, - "env": 2, - "instance": 3, - "job": 4, - "partition": 6, - "topic": 5 - }, - "renameByName": { - "Value": "offset" + "frameNameLabel": "id", + "frameNameMode": "label" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "hostname 1", + "topic 1", + "Value #A", + "Value #B", + "partition 1" + ] } } - } - ], - "type": "table" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ { - "color": "red", - "value": 80 + "field": "partition 1" } ] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "partition" - }, - "properties": [ - { - "id": "custom.width", - "value": 103 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "offset" - }, - "properties": [ - { - "id": "custom.width", - "value": 105 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "topic" - }, - "properties": [ + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ { - "id": "custom.width", - "value": 289 + "field": "topic 1" } ] } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 29 - }, - "id": 7, - "options": { - "showHeader": true, - "sortBy": [ - { - "desc": false, - "displayName": "partition" - } - ] - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "kafka_log_log_logendoffset{job=\"kafka-broker\",env=~\"$env\",topic=\"$topic\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "End Offset", - "transformations": [ + }, { "id": "organize", "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "env": true, - "instance": false, - "job": true - }, + "excludeByName": {}, "indexByName": { - "Time": 0, - "Value": 7, - "__name__": 1, - "env": 2, - "instance": 3, - "job": 4, - "partition": 6, - "topic": 5 + "Value #A": 3, + "Value #B": 4, + "hostname 1": 5, + "partition 1": 2, + "topic 1": 1 }, "renameByName": { - "Value": "offset" + "Value #A": "start offset", + "Value #B": "end offset", + "hostname 1": "broker", + "partition 1": "", + "topic": "", + "topic 1": "" } } } ], + "transparent": false, "type": "table" } ], - "title": "Topic offsets", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Offsets", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_log_log_size,topic)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Topic name", + "label": "Topic", "multi": true, "name": "topic", "options": [], - "query": { - "query": "label_values(kafka_log_log_size,topic)", - "refId": "Prometheus-topic-Variable-Query" - }, + "query": "label_values(kafka_log_log_size{env=\"$env\"}, topic)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", - "title": "Kafka Topics", - "uid": "vQT4b1-Mz", - "version": 1 -} \ No newline at end of file + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka topics", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json index 37ed490b..91fa6305 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json @@ -1,36 +1,38 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of ksqlDB clusters.", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 10, - "iteration": 1632254575966, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 1, @@ -38,42 +40,48 @@ "x": 0, "y": 0 }, - "id": 29, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Overview", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Overview", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average number of active queries per server.", + "description": "ksqlDB online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -82,76 +90,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_num_active_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", - "instant": true, + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Active Queries", + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of created queries", + "description": "Number of active queries deployed in the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 800 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -160,75 +173,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_running_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Running Queries", + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of persisted queries", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#299c46", - "value": 1 + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -237,75 +264,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 4, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_num_persistent_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total Persisted Queries", + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of rebalancing queries", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -314,75 +355,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 1 + "y": 0 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rebalancing Queries", + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of error query", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 2 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -391,380 +446,406 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 16, - "y": 1 + "y": 0 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 6, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Queries in Error State", + "title": "Connect: Sum of Queries Failed", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of idle queries", + "description": "A metric with constant value 1 indicating the server is up and emitting metrics.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 20, + "h": 10, + "w": 8, + "x": 0, "y": 1 }, - "id": 19, + "height": null, + "hideTimeOverride": false, + "id": 7, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(ksql_ksql_engine_query_stats_num_idle_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_liveness_indicator{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Idle Queries", - "type": "stat" + "title": "Cluster Liveness", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "The number of messages consumed per second across all queries.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { - "align": null, - "displayMode": "auto", - "filterable": false + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "ksql_query" - }, - "properties": [ - { - "id": "custom.width", - "value": 426 - } - ] + "steps": [] }, - { - "matcher": { - "id": "byName", - "options": "instance" - }, - "properties": [ - { - "id": "custom.width", - "value": 381 - } - ] - } - ] + "unit": "cps" + }, + "overrides": [] }, "gridPos": { - "h": 9, - "w": 16, - "x": 0, - "y": 5 + "h": 10, + "w": 8, + "x": 8, + "y": 1 }, - "id": 23, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "showHeader": true, - "sortBy": [] + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "ksql_ksql_metrics_ksql_queries_query_status{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"}", - "format": "table", - "instant": true, + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Queries Status", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": true, - "__name__": true, - "env": true, - "job": true, - "ksql_cluster": true - }, - "indexByName": {}, - "renameByName": { - "Time": "", - "__name__": "", - "instance": "", - "ksql_cluster": "", - "ksql_query": "" - } - } - } - ], - "type": "table" + "title": "Messages consumed/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of not running queries", + "description": "The number of messages produced per second across all queries.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] + "steps": [] }, - "unit": "none" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, + "h": 10, + "w": 8, "x": 16, - "y": 5 + "y": 1 }, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(ksql_ksql_engine_query_stats_not_running_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Stopped Queries", - "type": "stat" + "title": "Messages produced/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Num of running queries", + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 5 + "h": 1, + "w": 24, + "x": 0, + "y": 2 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(ksql_ksql_engine_query_stats_pending_shutdown_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Currently Shutting Down Queries", - "type": "stat" + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Cluster liveness", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -775,7 +856,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -786,144 +867,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 5, + "h": 10, "w": 8, - "x": 16, - "y": 9 + "x": 0, + "y": 2 }, - "id": 17, + "height": null, + "hideTimeOverride": false, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" - ], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "ksql_ksql_engine_query_stats_liveness_indicator{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Cluster liveness", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "Message consumed/sec", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 14 - }, - "id": 21, - "options": { - "legend": { - "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -932,23 +914,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"}", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Message consumed/sec", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Message produced/sec", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -959,7 +956,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -970,51 +967,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 14 + "h": 10, + "w": 8, + "x": 8, + "y": 2 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1023,41 +1014,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(ksql_ksql_engine_query_stats_messages_produced_per_sec{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Message produced/sec", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "collapsed": false, - "datasource": null, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 33, - "panels": [], - "title": "System", - "type": "row" - }, - { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1068,7 +1056,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1079,638 +1067,115 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 0, - "y": 23 + "x": 16, + "y": 2 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "irate(process_cpu_seconds_total{job=\"ksqldb\", env=\"$env\", instance=~\"$instance\"}[5m])", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 23 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"ksqldb\", env=\"$env\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "Used:{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{job=\"ksqldb\",env=\"$env\",area=\"heap\"}", - "interval": "", - "legendFormat": "Max:{{instance}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM Memory Used", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 4, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 23 - }, - "id": 14, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"ksqldb\", env=\"$env\", instance=~\"$instance\"}[5m]))", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Time spent in GC", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 31, - "panels": [ - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 6, - "x": 0, - "y": 32 - }, - "id": 26, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" - }, - { - "refId": "C" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Latency (Avg)", - "transformations": [], - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 6, - "x": 6, - "y": 32 - }, - "id": 35, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Latency (Max)", - "transformations": [], - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 6, - "x": 12, - "y": 32 - }, - "id": 25, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Process Latency (Avg)", - "transformations": [], - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 6, - "x": 18, - "y": 32 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_streams_stream_thread_metrics_process_latency_max{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" - } + "calcs": [ + "max", + "mean", + "last" ], - "timeFrom": null, - "timeShift": null, - "title": "Process Latency Max", - "transformations": [], - "type": "timeseries" + "displayMode": "table", + "placement": "bottom" }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1732,74 +1197,99 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 43 + "y": 3 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit Latency (Avg)", + "title": "Poll Latency", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1821,74 +1311,99 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 6, - "y": 43 - }, - "id": 38, + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit Latency (Max)", + "title": "Process Latency", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1910,74 +1425,99 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 12, - "y": 43 + "h": 10, + "w": 8, + "x": 0, + "y": 4 }, - "id": 27, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Punctuate Latency (Avg)", + "title": "Commit Latency", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1996,113 +1536,144 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/max/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 5, - 2 - ], - "fill": "dash" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 18, - "y": 43 - }, - "id": 37, + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{thread_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Punctuate Latency (Max)", + "title": "Punctuate Latency", "transformations": [], + "transparent": false, "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Queries Performance", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 5 }, - "id": 40, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2121,54 +1692,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 33 + "y": 5 }, - "id": 36, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2177,24 +1742,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Put Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2213,53 +1792,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 33 - }, - "id": 42, + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2268,24 +1842,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put average latency", + "title": "Put Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2304,53 +1892,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 33 - }, - "id": 43, + "h": 10, + "w": 8, + "x": 16, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2359,24 +1942,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put max latency", + "title": "Put Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2395,54 +1992,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 44 + "y": 6 }, - "id": 52, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2451,24 +2042,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put if absent rate", + "title": "Put if absent Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2487,53 +2092,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 44 - }, - "id": 53, + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2542,24 +2142,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put if absent average latency", + "title": "Put if absent Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2578,53 +2192,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 44 + "h": 10, + "w": 8, + "x": 16, + "y": 6 }, - "id": 54, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2633,24 +2242,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put if absent max latency", + "title": "Put if absent Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2669,54 +2292,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 55 + "y": 7 }, - "id": 41, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2725,24 +2342,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_fetch_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Fetch Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2761,53 +2392,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 55 + "h": 10, + "w": 8, + "x": 8, + "y": 7 }, - "id": 44, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2816,24 +2442,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch average latency", + "title": "Fetch Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2852,53 +2492,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 55 - }, - "id": 45, + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2907,24 +2542,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch max latency", + "title": "Fetch Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2943,54 +2592,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 66 + "y": 8 }, - "id": 46, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2999,24 +2642,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Delete Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3035,53 +2692,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 66 - }, - "id": 47, + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3090,24 +2742,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Delete average latency", + "title": "Delete Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3126,53 +2792,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 66 - }, - "id": 48, + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3181,24 +2842,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Delete max latency", + "title": "Delete Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3217,54 +2892,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 77 + "y": 9 }, - "id": 49, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3273,24 +2942,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Restore Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3309,53 +2992,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 77 - }, - "id": 50, + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3364,24 +3042,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Restore average latency", + "title": "Restore Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3400,53 +3092,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 77 + "h": 10, + "w": 8, + "x": 16, + "y": 9 }, - "id": 51, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3455,130 +3142,174 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Restore max latency", + "title": "Restore Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "StateStore Metric", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "State Stores", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "ksqldb" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,env)", - "refId": "Prometheus-env-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "All", - "value": "$__all" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "ksqldb_cluster_id", + "includeAll": false, + "label": "ksqlDB cluster", + "multi": false, + "name": "ksqldb_cluster", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "refId": "Prometheus-ksqldb_cluster_id-Variable-Query" + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\"},ksqldb_cluster_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "datasource": "Prometheus", + "hide": 2, + "includeAll": false, + "label": "ksqlDB cluster ID", + "multi": false, + "name": "ksqldb_cluster_id", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\"},ksql_cluster)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "All", - "value": "$__all" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", + "label": "ksqlDB server", "multi": true, - "name": "instance", + "name": "ksqldb_server", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,instance)", - "refId": "Prometheus-instance-Variable-Query" - }, + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -3588,10 +3319,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", + "timezone": "browser", "title": "ksqlDB cluster", - "uid": "pbx34foGk", - "version": 1 -} \ No newline at end of file + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json index 50235352..cd99a568 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json @@ -1,396 +1,543 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Schema Registry cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 4, - "iteration": 1632254298743, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 19, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Schemas", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Schema Registry online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "red", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { "color": "green", - "value": 1 + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(kafka_schema_registry_registered_count{job=\"schema-registry\",env=\"$env\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schema Registry Instances", + "title": "SR: Online instances", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Average number of registered schemas across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 5, + "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 11, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], - "displayMode": "table", - "placement": "right" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum by(schema_type) (kafka_schema_registry_schemas_created{job=\"schema-registry\",env=\"$env\"})", + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{schema_type}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schema registered over time", - "type": "timeseries" + "title": "SR: Registered Schemas (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", + "description": "Average number of schemas created, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "unit": "short" + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 5, "w": 4, - "x": 16, - "y": 1 + "x": 8, + "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 4, "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "7.3.4", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum by(schema_type)(kafka_schema_registry_schemas_created{job=\"schema-registry\",env=\"$env\"})", - "instant": true, + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{schema_type}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schemas created", - "type": "piechart" + "title": "SR: Created Schemas by Type (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "unit": "short" + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 5, "w": 4, - "x": 20, - "y": 1 + "x": 12, + "y": 0 }, - "id": 9, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "7.3.4", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "avg by(schema_type)(kafka_schema_registry_schemas_deleted{job=\"schema-registry\",env=\"$env\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{schema_type}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schemas deleted", - "type": "piechart" + "title": "SR: Sum of Deleted Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of active connections", + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 16, + "y": 0 }, - "id": 25, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_schema_registry_registered_count{job=\"schema-registry\",env=\"$env\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schemas registered", + "title": "SR: Sum of Active Connections", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 9 + "y": 1 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -401,7 +548,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -412,50 +559,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percent" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 10 + "y": 1 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -464,23 +606,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(process_cpu_seconds_total{job=\"schema-registry\",env=\"$env\"}[5m])*100", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=\"$sr_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -491,7 +648,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -502,51 +659,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 10 + "y": 1 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -555,29 +706,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"schema-registry\",env=\"$env\"})", - "interval": "", - "legendFormat": "Used:{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{job=\"schema-registry\",env=\"$env\",area=\"heap\"}", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=\"$sr_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "Max:{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -588,7 +748,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -599,52 +759,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 3, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 16, - "y": 10 + "y": 1 }, - "id": 23, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -653,34 +806,79 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"schema-registry\",env=\"$env\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=\"$sr_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Time spent in GC", + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 2 }, - "id": 17, - "title": "Connections", + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -691,7 +889,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -702,14 +900,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -717,34 +913,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 18 + "y": 2 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -753,22 +947,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jetty_metrics_connections_active{job=\"schema-registry\",env=\"$env\"}", + "datasource": null, + "expr": "kafka_schema_registry_kafka_schema_registry_metrics_connection_count{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Active Connections", + "title": "Connections", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -779,7 +989,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -790,14 +1000,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -805,34 +1013,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 18 + "y": 2 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -841,22 +1047,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jersey_metrics_request_rate{job=\"schema-registry\",env=\"$env\"}", + "datasource": null, + "expr": "kafka_schema_registry_jersey_metrics_request_rate{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Requests Rate", + "title": "Request Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -867,7 +1089,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -878,14 +1100,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -893,34 +1113,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 18 + "y": 2 }, - "id": 24, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -929,66 +1147,132 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jersey_metrics_request_latency_99{job=\"schema-registry\",env=\"$env\"}", + "datasource": null, + "expr": "kafka_schema_registry_jersey_metrics_request_latency_99{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Requests latency 99p", + "title": "Request Latency (p99)", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "schema-registry" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "sr_server", + "options": [], + "query": "label_values(kafka_schema_registry_registered_count{env=\"$env\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", "title": "Schema Registry cluster", - "uid": "9ixzve-Mk", - "version": 2 -} \ No newline at end of file + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json index 04646780..d2c4c741 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json @@ -1,76 +1,103 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Zookeeper cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 5, - "iteration": 1632253434096, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 22, - "title": "Health Check", + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Quorum Size of Zookeeper ensemble", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 3 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" } ] }, @@ -79,78 +106,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(zookeeper_status_quorumsize{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper nodes online", + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Alive Connections", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 - }, - { - "color": "#d44a3a", - "value": 200 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -159,163 +189,180 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_numaliveconnections{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Alive Connections", + "title": "ZK: ZNodes (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of queued requests in the server. This goes up when the server receives more requests than it can process", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line+area" - } - }, - "decimals": 0, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], - "min": 0, + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "transparent", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" }, { "color": "red", - "value": 10 + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 16, + "h": 5, + "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "zookeeper_outstandingrequests{job=\"zookeeper\",env=\"$env\"}", + "datasource": null, + "expr": "zookeeper_numaliveconnections{env=\"$env\"} / zookeeper_maxclientcnxnsperhost{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{instance}})", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Outstanding Requests", - "type": "timeseries" + "title": "ZK: Connections used", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -324,139 +371,230 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 12, + "y": 0 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(zookeeper_inmemorydatatree_nodecount{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of ZNodes", + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Watchers", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 1000 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 5 + "h": 5, + "w": 8, + "x": 16, + "y": 0 }, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 6, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "right" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_inmemorydatatree_watchcount{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "zookeeper_outstandingrequests{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of Watchers", - "type": "stat" + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 9 + "y": 1 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -467,7 +605,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -478,46 +616,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "percent" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 10 + "y": 1 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -526,23 +663,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(process_cpu_seconds_total{job=\"zookeeper\",env=\"$env\"}[5m])*100", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$zk_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -553,7 +705,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -564,47 +716,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 10 + "y": 1 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -613,29 +763,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=\"$zk_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "Used:{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{job=\"zookeeper\",env=\"$env\",area=\"heap\"}", - "interval": "", - "legendFormat": "Max:{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -646,7 +805,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -657,48 +816,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 3, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 16, - "y": 10 + "y": 1 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -707,329 +863,1028 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"zookeeper\",env=\"$env\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=\"$zk_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Time spent in GC", + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, + "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 2 }, - "id": 18, - "title": "Request Latency", - "type": "row" - }, - { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_minrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Minimum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 }, - "decimals": 0, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 18 - }, - "id": 9, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_avgrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Average)", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ { - "expr": "zookeeper_minrequestlatency{job=\"zookeeper\",env=\"$env\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{instance}})", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_maxrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Maximum)", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Request Latency - Minimum", - "type": "timeseries" + "title": "Server Latency", + "transformations": [], + "transparent": false, + "type": "row" }, { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "links": [], - "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ms" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 18 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + "h": 1, + "w": 24, + "x": 0, + "y": 3 }, - "pluginVersion": "8.1.3", - "targets": [ + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "exemplar": true, - "expr": "zookeeper_avgrequestlatency{job=\"zookeeper\",env=\"$env\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{instance}})", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Latency - Average", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{env=\"$env\",quantile=~\"$quantile\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sync Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "ms" + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Expired Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 18 - }, - "id": 11, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Disconnected Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ { - "expr": "zookeeper_maxrequestlatency{job=\"zookeeper\",env=\"$env\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{instance}})", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Auth Failures on Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Request Latency - Maximum", - "type": "timeseries" + "title": "Client Latency (Kafka)", + "transformations": [], + "transparent": false, + "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka", + "zookeeper" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "zk_server", + "options": [], + "query": "label_values(zookeeper_outstandingrequests{env=\"$env\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{env=\"$env\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -1039,10 +1894,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", + "timezone": "browser", "title": "Zookeeper cluster", - "uid": "H4xS98vWk", - "version": 1 -} \ No newline at end of file + "uid": null, + "version": 0 +}