diff --git a/src/bentoml/_internal/server/metrics/prometheus.py b/src/bentoml/_internal/server/metrics/prometheus.py index e2a3eea7583..4895ad1089f 100644 --- a/src/bentoml/_internal/server/metrics/prometheus.py +++ b/src/bentoml/_internal/server/metrics/prometheus.py @@ -2,6 +2,7 @@ import logging import os +import re import typing as t from functools import partial from typing import TYPE_CHECKING @@ -101,9 +102,107 @@ def generate_latest(self): if self.multiproc: registry = self.prometheus_client.CollectorRegistry() self.prometheus_client.multiprocess.MultiProcessCollector(registry) - return self.prometheus_client.generate_latest(registry) + raw_output = self.prometheus_client.generate_latest(registry) + return self._fix_histogram_ordering(raw_output) else: - return self.prometheus_client.generate_latest() + raw_output = self.prometheus_client.generate_latest() + return self._fix_histogram_ordering(raw_output) + + def _fix_histogram_ordering(self, prometheus_output: bytes) -> bytes: + """ + Fix histogram metric ordering to comply with Prometheus text format specification. + + The Prometheus format requires histogram metrics to be grouped by metric name with: + 1. All _bucket metrics for a histogram (in ascending order of 'le' values) + 2. Followed by _count metric + 3. Followed by _sum metric + + Args: + prometheus_output: Raw Prometheus format output + + Returns: + Properly ordered Prometheus format output + """ + lines = prometheus_output.decode("utf-8").strip().split("\n") + + # Separate comments/help lines from metric lines + comment_lines = [] + metric_lines = [] + + for line in lines: + if line.startswith("#") or line.strip() == "": + comment_lines.append(line) + else: + metric_lines.append(line) + + # Group metrics by base name (without _bucket, _count, _sum suffixes) + metrics_by_base = {} + non_histogram_metrics = [] + + for line in metric_lines: + if not line.strip(): + continue + + # Extract metric name (everything before the first space or '{') + if "{" in line: + metric_name = line.split("{")[0] + else: + metric_name = line.split(" ")[0] + + # Check if this is a histogram metric + if metric_name.endswith("_bucket"): + base_name = metric_name[:-7] # Remove '_bucket' + if base_name not in metrics_by_base: + metrics_by_base[base_name] = {"bucket": [], "count": [], "sum": []} + metrics_by_base[base_name]["bucket"].append(line) + elif metric_name.endswith("_count"): + base_name = metric_name[:-6] # Remove '_count' + if base_name not in metrics_by_base: + metrics_by_base[base_name] = {"bucket": [], "count": [], "sum": []} + metrics_by_base[base_name]["count"].append(line) + elif metric_name.endswith("_sum"): + base_name = metric_name[:-4] # Remove '_sum' + if base_name not in metrics_by_base: + metrics_by_base[base_name] = {"bucket": [], "count": [], "sum": []} + metrics_by_base[base_name]["sum"].append(line) + else: + non_histogram_metrics.append(line) + + # Function to extract 'le' value for bucket sorting + def extract_le_value(bucket_line: str) -> float: + try: + # Find le="value" in the line + match = re.search(r'le="([^"]+)"', bucket_line) + if match: + le_val = match.group(1) + if le_val == "+Inf": + return float("inf") + return float(le_val) + return float("inf") # Default if parsing fails + except (ValueError, TypeError): + return float("inf") + + # Rebuild the output with proper ordering + result_lines = comment_lines.copy() + + # Add non-histogram metrics first + result_lines.extend(non_histogram_metrics) + + # Add histogram metrics in proper order + for base_name in sorted(metrics_by_base.keys()): + hist_data = metrics_by_base[base_name] + + # Sort buckets by 'le' value in ascending order + sorted_buckets = sorted(hist_data["bucket"], key=extract_le_value) + result_lines.extend(sorted_buckets) + + # Add count metrics + result_lines.extend(hist_data["count"]) + + # Add sum metrics + result_lines.extend(hist_data["sum"]) + + return "\n".join(result_lines).encode("utf-8") def text_string_to_metric_families(self) -> t.Generator[Metric, None, None]: yield from self.prometheus_client.parser.text_string_to_metric_families(