From c23006ffa2564c7d31f7b8958e8346f1d4adce90 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Wed, 20 Aug 2025 22:29:31 +0000 Subject: [PATCH 1/9] Implementation and tests for the core utils packages for the scheduler refactor --- tests/unit/utils/test_registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/utils/test_registry.py b/tests/unit/utils/test_registry.py index b5c17975..7769e021 100644 --- a/tests/unit/utils/test_registry.py +++ b/tests/unit/utils/test_registry.py @@ -531,3 +531,4 @@ def walk_packages(package_path, package_name): assert len(objects) == 1 assert TestAutoRegistry.registry_populated is True assert TestAutoRegistry.registry is not None + assert "module1class" in TestAutoRegistry.registry From f81489fd6ba1d7cec5994a0ec3382aaa5d5ff3ee Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Wed, 20 Aug 2025 19:01:42 -0400 Subject: [PATCH 2/9] Update tests/unit/utils/test_registry.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Mark Kurtz --- tests/unit/utils/test_registry.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/utils/test_registry.py b/tests/unit/utils/test_registry.py index 7769e021..b5c17975 100644 --- a/tests/unit/utils/test_registry.py +++ b/tests/unit/utils/test_registry.py @@ -531,4 +531,3 @@ def walk_packages(package_path, package_name): assert len(objects) == 1 assert TestAutoRegistry.registry_populated is True assert TestAutoRegistry.registry is not None - assert "module1class" in TestAutoRegistry.registry From 47ce29248e510dd6852133a7e833efed30a96a16 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Thu, 21 Aug 2025 01:29:34 +0000 Subject: [PATCH 3/9] Implementation and tests for the next utils package modules iteration for the scheduler refactor --- src/guidellm/benchmark/aggregator.py | 12 +- src/guidellm/benchmark/benchmark.py | 9 +- src/guidellm/benchmark/output.py | 12 +- src/guidellm/objects/__init__.py | 15 - src/guidellm/presentation/data_models.py | 2 +- src/guidellm/utils/__init__.py | 26 + src/guidellm/utils/functions.py | 133 +++ src/guidellm/utils/mixins.py | 113 ++ src/guidellm/{objects => utils}/statistics.py | 0 src/guidellm/utils/text.py | 194 +++- tests/unit/mock_benchmark.py | 2 +- tests/unit/objects/__init__.py | 0 tests/unit/objects/test_statistics.py | 785 -------------- tests/unit/utils/test_functions.py | 222 ++++ tests/unit/utils/test_mixins.py | 245 +++++ tests/unit/utils/test_statistics.py | 990 ++++++++++++++++++ tests/unit/utils/test_text.py | 531 ++++++++++ 17 files changed, 2440 insertions(+), 851 deletions(-) delete mode 100644 src/guidellm/objects/__init__.py create mode 100644 src/guidellm/utils/functions.py create mode 100644 src/guidellm/utils/mixins.py rename src/guidellm/{objects => utils}/statistics.py (100%) delete mode 100644 tests/unit/objects/__init__.py delete mode 100644 tests/unit/objects/test_statistics.py create mode 100644 tests/unit/utils/test_functions.py create mode 100644 tests/unit/utils/test_mixins.py create mode 100644 tests/unit/utils/test_statistics.py create mode 100644 tests/unit/utils/test_text.py diff --git a/src/guidellm/benchmark/aggregator.py b/src/guidellm/benchmark/aggregator.py index b322eadd..450b536a 100644 --- a/src/guidellm/benchmark/aggregator.py +++ b/src/guidellm/benchmark/aggregator.py @@ -22,10 +22,6 @@ GenerativeTextResponseStats, ) from guidellm.config import settings -from guidellm.objects import ( - RunningStats, - TimeRunningStats, -) from guidellm.request import ( GenerationRequest, GenerativeRequestLoaderDescription, @@ -38,7 +34,13 @@ SchedulerRequestResult, WorkerDescription, ) -from guidellm.utils import StandardBaseModel, StatusBreakdown, check_load_processor +from guidellm.utils import ( + RunningStats, + StandardBaseModel, + StatusBreakdown, + TimeRunningStats, + check_load_processor, +) __all__ = [ "AggregatorT", diff --git a/src/guidellm/benchmark/benchmark.py b/src/guidellm/benchmark/benchmark.py index 77d0fe38..eadcf984 100644 --- a/src/guidellm/benchmark/benchmark.py +++ b/src/guidellm/benchmark/benchmark.py @@ -12,9 +12,6 @@ SynchronousProfile, ThroughputProfile, ) -from guidellm.objects import ( - StatusDistributionSummary, -) from guidellm.request import ( GenerativeRequestLoaderDescription, RequestLoaderDescription, @@ -30,7 +27,11 @@ ThroughputStrategy, WorkerDescription, ) -from guidellm.utils import StandardBaseModel, StatusBreakdown +from guidellm.utils import ( + StandardBaseModel, + StatusBreakdown, + StatusDistributionSummary, +) __all__ = [ "Benchmark", diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index dd94f899..225ed2b1 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -21,14 +21,16 @@ ThroughputProfile, ) from guidellm.config import settings -from guidellm.objects import ( - DistributionSummary, - StatusDistributionSummary, -) from guidellm.presentation import UIDataBuilder from guidellm.presentation.injector import create_report from guidellm.scheduler import strategy_display_str -from guidellm.utils import Colors, StandardBaseModel, split_text_list_by_length +from guidellm.utils import ( + Colors, + DistributionSummary, + StandardBaseModel, + StatusDistributionSummary, + split_text_list_by_length, +) __all__ = [ "GenerativeBenchmarksConsole", diff --git a/src/guidellm/objects/__init__.py b/src/guidellm/objects/__init__.py deleted file mode 100644 index 119ac6e7..00000000 --- a/src/guidellm/objects/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from .statistics import ( - DistributionSummary, - Percentiles, - RunningStats, - StatusDistributionSummary, - TimeRunningStats, -) - -__all__ = [ - "DistributionSummary", - "Percentiles", - "RunningStats", - "StatusDistributionSummary", - "TimeRunningStats", -] diff --git a/src/guidellm/presentation/data_models.py b/src/guidellm/presentation/data_models.py index ff5221e3..3164dc86 100644 --- a/src/guidellm/presentation/data_models.py +++ b/src/guidellm/presentation/data_models.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: from guidellm.benchmark.benchmark import GenerativeBenchmark -from guidellm.objects.statistics import DistributionSummary +from guidellm.utils.statistics import DistributionSummary class Bucket(BaseModel): diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 98ac1c36..0a1ff10d 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,6 +1,14 @@ from .auto_importer import AutoImporterMixin from .colors import Colors from .default_group import DefaultGroupHandler +from .functions import ( + all_defined, + safe_add, + safe_divide, + safe_format_timestamp, + safe_getattr, + safe_multiply, +) from .hf_datasets import ( SUPPORTED_TYPES, save_dataset_to_file, @@ -18,6 +26,13 @@ from .random import IntegerRangeSampler from .registry import RegistryMixin from .singleton import SingletonMixin, ThreadSafeSingletonMixin +from .statistics import ( + DistributionSummary, + Percentiles, + RunningStats, + StatusDistributionSummary, + TimeRunningStats, +) from .text import ( EndlessTextCreator, clean_text, @@ -33,21 +48,32 @@ "AutoImporterMixin", "Colors", "DefaultGroupHandler", + "DistributionSummary", "EndlessTextCreator", "IntegerRangeSampler", + "Percentiles", "PydanticClassRegistryMixin", "RegistryMixin", "ReloadableBaseModel", + "RunningStats", "SingletonMixin", "StandardBaseDict", "StandardBaseModel", "StatusBreakdown", + "StatusDistributionSummary", "ThreadSafeSingletonMixin", + "TimeRunningStats", + "all_defined", "check_load_processor", "clean_text", "filter_text", "is_puncutation", "load_text", + "safe_add", + "safe_divide", + "safe_format_timestamp", + "safe_getattr", + "safe_multiply", "save_dataset_to_file", "split_text", "split_text_list_by_length", diff --git a/src/guidellm/utils/functions.py b/src/guidellm/utils/functions.py new file mode 100644 index 00000000..6343cbf2 --- /dev/null +++ b/src/guidellm/utils/functions.py @@ -0,0 +1,133 @@ +""" +Utility functions for safe operations and value handling. + +Provides defensive programming utilities for common operations that may encounter +None values, invalid inputs, or edge cases. Includes safe arithmetic operations, +attribute access, and timestamp formatting. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any + +__all__ = [ + "all_defined", + "safe_add", + "safe_divide", + "safe_format_timestamp", + "safe_getattr", + "safe_multiply", +] + + +def safe_getattr(obj: Any | None, attr: str, default: Any = None) -> Any: + """ + Safely get an attribute from an object with None handling. + + :param obj: Object to get the attribute from, or None + :param attr: Name of the attribute to retrieve + :param default: Value to return if object is None or attribute doesn't exist + :return: Attribute value or default if not found or object is None + """ + if obj is None: + return default + + return getattr(obj, attr, default) + + +def all_defined(*values: Any | None) -> bool: + """ + Check if all provided values are defined (not None). + + :param values: Variable number of values to check for None + :return: True if all values are not None, False otherwise + """ + return all(value is not None for value in values) + + +def safe_divide( + numerator: int | float | None, + denominator: int | float | None, + num_default: float = 0.0, + den_default: float = 1.0, +) -> float: + """ + Safely divide two numbers with None handling and zero protection. + + :param numerator: Number to divide, or None to use num_default + :param denominator: Number to divide by, or None to use den_default + :param num_default: Default value for numerator if None + :param den_default: Default value for denominator if None + :return: Division result with protection against division by zero + """ + numerator = numerator if numerator is not None else num_default + denominator = denominator if denominator is not None else den_default + + return numerator / (denominator or 1e-10) + + +def safe_multiply(*values: int | float | None, default: float = 1.0) -> float: + """ + Safely multiply multiple numbers with None handling. + + :param values: Variable number of values to multiply, None values treated as 1.0 + :param default: Starting value for multiplication + :return: Product of all non-None values multiplied by default + """ + result = default + for val in values: + result *= val if val is not None else 1.0 + return result + + +def safe_add( + *values: int | float | None, signs: list[int] | None = None, default: float = 0.0 +) -> float: + """ + Safely add multiple numbers with None handling and optional signs. + + :param values: Variable number of values to add, None values use default + :param signs: Optional list of 1 (add) or -1 (subtract) for each value. + If None, all values are added. Must match length of values. + :param default: Value to substitute for None values + :return: Result of adding all values safely (default used when value is None) + """ + if not values: + return default + + values = list(values) + + if signs is None: + signs = [1] * len(values) + + if len(signs) != len(values): + raise ValueError("Length of signs must match length of values") + + result = values[0] if values[0] is not None else default + + for ind in range(1, len(values)): + val = values[ind] if values[ind] is not None else default + result += signs[ind] * val + + return result + + +def safe_format_timestamp( + timestamp: float | None, format_: str = "%H:%M:%S", default: str = "N/A" +) -> str: + """ + Safely format a timestamp with error handling and validation. + + :param timestamp: Unix timestamp to format, or None + :param format_: Strftime format string for timestamp formatting + :param default: Value to return if timestamp is invalid or None + :return: Formatted timestamp string or default value + """ + if timestamp is None or timestamp < 0 or timestamp > 2**31: + return default + + try: + return datetime.fromtimestamp(timestamp).strftime(format_) + except (ValueError, OverflowError, OSError): + return default diff --git a/src/guidellm/utils/mixins.py b/src/guidellm/utils/mixins.py new file mode 100644 index 00000000..1b61f491 --- /dev/null +++ b/src/guidellm/utils/mixins.py @@ -0,0 +1,113 @@ +""" +Mixin classes for common metadata extraction and object introspection. + +Provides reusable mixins for extracting structured metadata from objects, +enabling consistent information exposure across different class hierarchies. +""" + +from __future__ import annotations + +from typing import Any + +__all__ = ["InfoMixin"] + + +class InfoMixin: + """ + Mixin class providing standardized metadata extraction for introspection. + + Enables consistent object metadata extraction patterns across different + class hierarchies for debugging, serialization, and runtime analysis. + Provides both instance and class-level methods for extracting structured + information from arbitrary objects with fallback handling for objects + without built-in info capabilities. + + Example: + :: + from guidellm.utils.mixins import InfoMixin + + class ConfiguredClass(InfoMixin): + def __init__(self, setting: str): + self.setting = setting + + obj = ConfiguredClass("value") + # Returns {'str': 'ConfiguredClass(...)', 'type': 'ConfiguredClass', ...} + print(obj.info) + """ + + @classmethod + def extract_from_obj(cls, obj: Any) -> dict[str, Any]: + """ + Extract structured metadata from any object. + + Attempts to use the object's own `info` method or property if available, + otherwise constructs metadata from object attributes and type information. + Provides consistent metadata format across different object types. + + :param obj: Object to extract metadata from + :return: Dictionary containing object metadata including type, class, + module, and public attributes + """ + if hasattr(obj, "info"): + return obj.info() if callable(obj.info) else obj.info + + return { + "str": str(obj), + "type": type(obj).__name__, + "class": obj.__class__.__name__ if hasattr(obj, "__class__") else None, + "module": obj.__class__.__module__ if hasattr(obj, "__class__") else None, + "attributes": ( + { + key: val + if isinstance(val, (str, int, float, bool, list, dict)) + else str(val) + for key, val in obj.__dict__.items() + if not key.startswith("_") + } + if hasattr(obj, "__dict__") + else {} + ), + } + + @classmethod + def create_info_dict(cls, obj: Any) -> dict[str, Any]: + """ + Create a structured info dictionary for the given object. + + Builds standardized metadata dictionary containing object identification, + type information, and accessible attributes. Used internally by other + info extraction methods and available for direct metadata construction. + + :param obj: Object to extract info from + :return: Dictionary containing structured metadata about the object + """ + return { + "str": str(obj), + "type": type(obj).__name__, + "class": obj.__class__.__name__ if hasattr(obj, "__class__") else None, + "module": obj.__class__.__module__ if hasattr(obj, "__class__") else None, + "attributes": ( + { + key: val + if isinstance(val, (str, int, float, bool, list, dict)) + else str(val) + for key, val in obj.__dict__.items() + if not key.startswith("_") + } + if hasattr(obj, "__dict__") + else {} + ), + } + + @property + def info(self) -> dict[str, Any]: + """ + Return structured metadata about this instance. + + Provides consistent access to object metadata for debugging, serialization, + and introspection. Uses the create_info_dict method to generate standardized + metadata format including class information and public attributes. + + :return: Dictionary containing class name, module, and public attributes + """ + return self.create_info_dict(self) diff --git a/src/guidellm/objects/statistics.py b/src/guidellm/utils/statistics.py similarity index 100% rename from src/guidellm/objects/statistics.py rename to src/guidellm/utils/statistics.py diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py index cdefaa14..fd43fa41 100644 --- a/src/guidellm/utils/text.py +++ b/src/guidellm/utils/text.py @@ -1,9 +1,21 @@ +""" +Text processing utilities for content manipulation and formatting operations. + +Provides comprehensive text processing capabilities including cleaning, filtering, +splitting, loading from various sources, and formatting utilities. Supports loading +text from URLs, compressed files, package resources, and local files with automatic +encoding detection. Includes specialized formatting for display values and text +wrapping operations for consistent presentation across the system. +""" + +from __future__ import annotations + import gzip import re import textwrap from importlib.resources import as_file, files # type: ignore[attr-defined] from pathlib import Path -from typing import Any, Optional, Union +from typing import Any import ftfy import httpx @@ -11,35 +23,86 @@ from guidellm import data as package_data from guidellm.config import settings +from guidellm.utils.console import Colors __all__ = [ + "MAX_PATH_LENGTH", "EndlessTextCreator", "clean_text", "filter_text", + "format_value_display", "is_puncutation", "load_text", "split_text", "split_text_list_by_length", ] -MAX_PATH_LENGTH = 4096 +MAX_PATH_LENGTH: int = 4096 + + +def format_value_display( + value: float, + label: str, + units: str = "", + total_characters: int | None = None, + digits_places: int | None = None, + decimal_places: int | None = None, +) -> str: + """ + Format a numeric value with units and label for consistent display output. + + Creates standardized display strings for metrics and measurements with + configurable precision, width, and color formatting. Supports both + fixed-width and variable-width output for tabular displays. + + :param value: Numeric value to format and display + :param label: Descriptive label for the value + :param units: Units string to append after the value + :param total_characters: Total width for right-aligned output formatting + :param digits_places: Total number of digits for numeric formatting + :param decimal_places: Number of decimal places for numeric precision + :return: Formatted string with value, units, and colored label + """ + if decimal_places is None and digits_places is None: + formatted_number = f"{value}:.0f" + elif digits_places is None: + formatted_number = f"{value:.{decimal_places}f}" + elif decimal_places is None: + formatted_number = f"{value:>{digits_places}f}" + else: + formatted_number = f"{value:>{digits_places}.{decimal_places}f}" + + result = f"{formatted_number}{units} [{Colors.info}]{label}[/{Colors.info}]" + + if total_characters is not None: + total_characters += len(Colors.info) * 2 + 5 + + if len(result) < total_characters: + result = result.rjust(total_characters) + + return result def split_text_list_by_length( text_list: list[Any], - max_characters: Union[int, list[int]], + max_characters: int | list[int], pad_horizontal: bool = True, pad_vertical: bool = True, ) -> list[list[str]]: """ - Split a list of strings into a list of strings, - each with a maximum length of max_characters - - :param text_list: the list of strings to split - :param max_characters: the maximum length of each string - :param pad_horizontal: whether to pad the strings horizontally, defaults to True - :param pad_vertical: whether to pad the strings vertically, defaults to True - :return: a list of strings + Split text strings into wrapped lines with specified maximum character limits. + + Processes each string in the input list by wrapping text to fit within character + limits, with optional padding for consistent formatting in tabular displays. + Supports different character limits per string and uniform padding across results. + + :param text_list: List of strings to process and wrap + :param max_characters: Maximum characters per line, either single value or + per-string limits + :param pad_horizontal: Right-align lines within their character limits + :param pad_vertical: Pad shorter results to match the longest wrapped result + :return: List of wrapped line lists, one per input string + :raises ValueError: If max_characters list length doesn't match text_list length """ if not isinstance(max_characters, list): max_characters = [max_characters] * len(text_list) @@ -75,16 +138,21 @@ def split_text_list_by_length( def filter_text( text: str, - filter_start: Optional[Union[str, int]] = None, - filter_end: Optional[Union[str, int]] = None, + filter_start: str | int | None = None, + filter_end: str | int | None = None, ) -> str: """ - Filter text by start and end strings or indices + Extract text substring using start and end markers or indices. + + Filters text content by locating string markers or using numeric indices + to extract specific portions. Supports flexible filtering for content + extraction and preprocessing operations. - :param text: the text to filter - :param filter_start: the start string or index to filter from - :param filter_end: the end string or index to filter to - :return: the filtered text + :param text: Source text to filter and extract from + :param filter_start: Starting marker string or index position + :param filter_end: Ending marker string or index position + :return: Filtered text substring between specified boundaries + :raises ValueError: If filter indices are invalid or markers not found """ filter_start_index = -1 filter_end_index = -1 @@ -112,10 +180,29 @@ def filter_text( def clean_text(text: str) -> str: + """ + Normalize text by fixing encoding issues and standardizing whitespace. + + Applies Unicode normalization and whitespace standardization for consistent + text processing. Removes excessive whitespace and fixes common encoding problems. + + :param text: Raw text string to clean and normalize + :return: Cleaned text with normalized encoding and whitespace + """ return re.sub(r"\s+", " ", ftfy.fix_text(text)).strip() def split_text(text: str, split_punctuation: bool = False) -> list[str]: + """ + Split text into tokens with optional punctuation separation. + + Tokenizes text into words and optionally separates punctuation marks + for detailed text analysis and processing operations. + + :param text: Text string to tokenize and split + :param split_punctuation: Separate punctuation marks as individual tokens + :return: List of text tokens + """ text = clean_text(text) if split_punctuation: @@ -124,16 +211,20 @@ def split_text(text: str, split_punctuation: bool = False) -> list[str]: return text.split() -def load_text(data: Union[str, Path], encoding: Optional[str] = None) -> str: +def load_text(data: str | Path, encoding: str | None = None) -> str: """ - Load an HTML file from a path or URL - - :param data: the path or URL to load the HTML file from - :type data: Union[str, Path] - :param encoding: the encoding to use when reading the file - :type encoding: str - :return: the HTML content - :rtype: str + Load text content from various sources including URLs, files, and package data. + + Supports loading from HTTP/FTP URLs, local files, compressed archives, package + resources, and raw text strings. Automatically detects source type and applies + appropriate loading strategy with encoding support. + + :param data: Source location or raw text - URL, file path, package resource + identifier, or text content + :param encoding: Character encoding for file reading operations + :return: Loaded text content as string + :raises FileNotFoundError: If local file path does not exist + :raises httpx.HTTPStatusError: If URL request fails """ logger.debug("Loading text: {}", data) @@ -179,29 +270,62 @@ def load_text(data: Union[str, Path], encoding: Optional[str] = None) -> str: def is_puncutation(text: str) -> bool: """ - Check if the text is a punctuation + Check if a single character is a punctuation mark. + + Identifies punctuation characters by excluding alphanumeric characters + and whitespace from single-character strings. - :param text: the text to check - :type text: str - :return: True if the text is a punctuation, False otherwise - :rtype: bool + :param text: Single character string to test + :return: True if the character is punctuation, False otherwise """ return len(text) == 1 and not text.isalnum() and not text.isspace() class EndlessTextCreator: + """ + Infinite text generator for load testing and content creation operations. + + Provides deterministic text generation by cycling through preprocessed word + tokens from source content. Supports filtering and punctuation handling for + realistic text patterns in benchmarking scenarios. + + Example: + :: + creator = EndlessTextCreator("path/to/source.txt") + generated = creator.create_text(start=0, length=100) + more_text = creator.create_text(start=50, length=200) + """ + def __init__( self, - data: Union[str, Path], - filter_start: Optional[Union[str, int]] = None, - filter_end: Optional[Union[str, int]] = None, + data: str | Path, + filter_start: str | int | None = None, + filter_end: str | int | None = None, ): + """ + Initialize text creator with source content and optional filtering. + + :param data: Source text location or content - file path, URL, or raw text + :param filter_start: Starting marker or index for content filtering + :param filter_end: Ending marker or index for content filtering + """ self.data = data self.text = load_text(data) self.filtered_text = filter_text(self.text, filter_start, filter_end) self.words = split_text(self.filtered_text, split_punctuation=True) def create_text(self, start: int, length: int) -> str: + """ + Generate text by cycling through word tokens from the specified position. + + Creates deterministic text sequences by selecting consecutive tokens from + the preprocessed word list, wrapping around when reaching the end. + Maintains proper spacing and punctuation formatting. + + :param start: Starting position in the token sequence + :param length: Number of tokens to include in generated text + :return: Generated text string with proper spacing and punctuation + """ text = "" for counter in range(length): diff --git a/tests/unit/mock_benchmark.py b/tests/unit/mock_benchmark.py index 81364fa1..29c092c8 100644 --- a/tests/unit/mock_benchmark.py +++ b/tests/unit/mock_benchmark.py @@ -6,13 +6,13 @@ GenerativeTextResponseStats, SynchronousProfile, ) -from guidellm.objects import StatusBreakdown from guidellm.request import GenerativeRequestLoaderDescription from guidellm.scheduler import ( GenerativeRequestsWorkerDescription, SchedulerRequestInfo, SynchronousStrategy, ) +from guidellm.utils import StatusBreakdown __all__ = ["mock_generative_benchmark"] diff --git a/tests/unit/objects/__init__.py b/tests/unit/objects/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/objects/test_statistics.py b/tests/unit/objects/test_statistics.py deleted file mode 100644 index ede77175..00000000 --- a/tests/unit/objects/test_statistics.py +++ /dev/null @@ -1,785 +0,0 @@ -import math -import time -from typing import Literal - -import numpy as np -import pytest - -from guidellm.objects import ( - DistributionSummary, - Percentiles, - RunningStats, - StatusDistributionSummary, - TimeRunningStats, -) - - -def create_default_percentiles() -> Percentiles: - return Percentiles( - p001=0.1, - p01=1.0, - p05=5.0, - p10=10.0, - p25=25.0, - p50=50.0, - p75=75.0, - p90=90.0, - p95=95.0, - p99=99.0, - p999=99.9, - ) - - -def create_default_distribution_summary() -> DistributionSummary: - return DistributionSummary( - mean=50.0, - median=50.0, - mode=50.0, - variance=835, - std_dev=math.sqrt(835), - min=0.0, - max=100.0, - count=1001, - total_sum=50050.0, - percentiles=create_default_percentiles(), - ) - - -@pytest.mark.smoke -def test_percentiles_initialization(): - percentiles = create_default_percentiles() - assert percentiles.p001 == 0.1 - assert percentiles.p01 == 1.0 - assert percentiles.p05 == 5.0 - assert percentiles.p10 == 10.0 - assert percentiles.p25 == 25.0 - assert percentiles.p50 == 50.0 - assert percentiles.p75 == 75.0 - assert percentiles.p90 == 90.0 - assert percentiles.p95 == 95.0 - assert percentiles.p99 == 99.0 - assert percentiles.p999 == 99.9 - - -@pytest.mark.smoke -def test_percentiles_invalid_initialization(): - test_kwargs = { - "p001": 0.1, - "p01": 1.0, - "p05": 5.0, - "p10": 10.0, - "p25": 25.0, - "p50": 50.0, - "p75": 75.0, - "p90": 90.0, - "p95": 95.0, - "p99": 99.0, - "p999": 99.9, - } - test_missing_keys = list(test_kwargs.keys()) - - for missing_key in test_missing_keys: - kwargs = {key: val for key, val in test_kwargs.items() if key != missing_key} - with pytest.raises(ValueError): - Percentiles(**kwargs) - - -@pytest.mark.smoke -def test_percentiles_marshalling(): - percentiles = create_default_percentiles() - serialized = percentiles.model_dump() - deserialized = Percentiles.model_validate(serialized) - - for key, value in vars(percentiles).items(): - assert getattr(deserialized, key) == value - - -@pytest.mark.smoke -def test_distribution_summary_initilaization(): - distribution_summary = create_default_distribution_summary() - assert distribution_summary.mean == 50.0 - assert distribution_summary.median == 50.0 - assert distribution_summary.mode == 50.0 - assert distribution_summary.variance == 835 - assert distribution_summary.std_dev == math.sqrt(835) - assert distribution_summary.min == 0.0 - assert distribution_summary.max == 100.0 - assert distribution_summary.count == 1001 - assert distribution_summary.total_sum == 50050.0 - assert distribution_summary.percentiles.p001 == 0.1 - assert distribution_summary.percentiles.p01 == 1.0 - assert distribution_summary.percentiles.p05 == 5.0 - assert distribution_summary.percentiles.p10 == 10.0 - assert distribution_summary.percentiles.p25 == 25.0 - assert distribution_summary.percentiles.p50 == 50.0 - assert distribution_summary.percentiles.p75 == 75.0 - assert distribution_summary.percentiles.p90 == 90.0 - assert distribution_summary.percentiles.p95 == 95.0 - assert distribution_summary.percentiles.p99 == 99.0 - assert distribution_summary.percentiles.p999 == 99.9 - - -@pytest.mark.smoke -def test_distribution_summary_invalid_initialization(): - test_kwargs = { - "mean": 50.0, - "median": 50.0, - "mode": 50.0, - "variance": 835, - "std_dev": math.sqrt(835), - "min": 0.0, - "max": 100.0, - "count": 1001, - "total_sum": 50050.0, - "percentiles": create_default_percentiles(), - } - test_missing_keys = list(test_kwargs.keys()) - for missing_key in test_missing_keys: - kwargs = {key: val for key, val in test_kwargs.items() if key != missing_key} - with pytest.raises(ValueError): - DistributionSummary(**kwargs) # type: ignore[arg-type] - - -@pytest.mark.smoke -def test_distribution_summary_marshalling(): - distribution_summary = create_default_distribution_summary() - serialized = distribution_summary.model_dump() - deserialized = DistributionSummary.model_validate(serialized) - - for key, value in vars(distribution_summary).items(): - assert getattr(deserialized, key) == value - - -@pytest.mark.smoke -def test_distribution_summary_from_distribution_function(): - values = [val / 10.0 for val in range(1001)] - distribution = [(val, 1.0) for val in values] - distribution_summary = DistributionSummary.from_distribution_function(distribution) - assert distribution_summary.mean == pytest.approx(np.mean(values)) - assert distribution_summary.median == pytest.approx(np.median(values)) - assert distribution_summary.mode == 0.0 - assert distribution_summary.variance == pytest.approx(np.var(values, ddof=0)) - assert distribution_summary.std_dev == pytest.approx(np.std(values, ddof=0)) - assert distribution_summary.min == min(values) - assert distribution_summary.max == max(values) - assert distribution_summary.count == len(values) - assert distribution_summary.total_sum == sum(values) - assert distribution_summary.percentiles.p001 == pytest.approx( - np.percentile(values, 0.1) - ) - assert distribution_summary.percentiles.p01 == pytest.approx( - np.percentile(values, 1.0) - ) - assert distribution_summary.percentiles.p05 == pytest.approx( - np.percentile(values, 5.0) - ) - assert distribution_summary.percentiles.p10 == pytest.approx( - np.percentile(values, 10.0) - ) - assert distribution_summary.percentiles.p25 == pytest.approx( - np.percentile(values, 25.0) - ) - assert distribution_summary.percentiles.p50 == pytest.approx( - np.percentile(values, 50.0) - ) - assert distribution_summary.percentiles.p75 == pytest.approx( - np.percentile(values, 75.0) - ) - assert distribution_summary.percentiles.p90 == pytest.approx( - np.percentile(values, 90.0) - ) - assert distribution_summary.percentiles.p95 == pytest.approx( - np.percentile(values, 95.0) - ) - assert distribution_summary.percentiles.p99 == pytest.approx( - np.percentile(values, 99.0) - ) - assert distribution_summary.percentiles.p999 == pytest.approx( - np.percentile(values, 99.9) - ) - assert distribution_summary.cumulative_distribution_function is None - - distribution_summary_cdf = DistributionSummary.from_distribution_function( - distribution, include_cdf=True - ) - assert distribution_summary_cdf.cumulative_distribution_function is not None - assert len(distribution_summary_cdf.cumulative_distribution_function) == len(values) - - -def test_distribution_summary_from_values(): - values = [val / 10 for val in range(1001)] - distribution_summary = DistributionSummary.from_values(values) - assert distribution_summary.mean == pytest.approx(np.mean(values)) - assert distribution_summary.median == pytest.approx(np.median(values)) - assert distribution_summary.mode == 0.0 - assert distribution_summary.variance == pytest.approx(np.var(values, ddof=0)) - assert distribution_summary.std_dev == pytest.approx(np.std(values, ddof=0)) - assert distribution_summary.min == min(values) - assert distribution_summary.max == max(values) - assert distribution_summary.count == len(values) - assert distribution_summary.total_sum == sum(values) - assert distribution_summary.percentiles.p001 == pytest.approx( - np.percentile(values, 0.1) - ) - assert distribution_summary.percentiles.p01 == pytest.approx( - np.percentile(values, 1.0) - ) - assert distribution_summary.percentiles.p05 == pytest.approx( - np.percentile(values, 5.0) - ) - assert distribution_summary.percentiles.p10 == pytest.approx( - np.percentile(values, 10.0) - ) - assert distribution_summary.percentiles.p25 == pytest.approx( - np.percentile(values, 25.0) - ) - assert distribution_summary.percentiles.p50 == pytest.approx( - np.percentile(values, 50.0) - ) - assert distribution_summary.percentiles.p75 == pytest.approx( - np.percentile(values, 75.0) - ) - assert distribution_summary.percentiles.p90 == pytest.approx( - np.percentile(values, 90.0) - ) - assert distribution_summary.percentiles.p95 == pytest.approx( - np.percentile(values, 95.0) - ) - assert distribution_summary.percentiles.p99 == pytest.approx( - np.percentile(values, 99.0) - ) - assert distribution_summary.percentiles.p999 == pytest.approx( - np.percentile(values, 99.9) - ) - assert distribution_summary.cumulative_distribution_function is None - - distribution_summary_weights = DistributionSummary.from_values( - values, weights=[2] * len(values) - ) - assert distribution_summary_weights.mean == pytest.approx(np.mean(values)) - assert distribution_summary_weights.median == pytest.approx(np.median(values)) - assert distribution_summary_weights.mode == 0.0 - assert distribution_summary_weights.variance == pytest.approx( - np.var(values, ddof=0) - ) - assert distribution_summary_weights.std_dev == pytest.approx(np.std(values, ddof=0)) - assert distribution_summary_weights.min == min(values) - assert distribution_summary_weights.max == max(values) - assert distribution_summary_weights.count == len(values) - assert distribution_summary_weights.total_sum == sum(values) - assert distribution_summary_weights.cumulative_distribution_function is None - - distribution_summary_cdf = DistributionSummary.from_values(values, include_cdf=True) - assert distribution_summary_cdf.cumulative_distribution_function is not None - assert len(distribution_summary_cdf.cumulative_distribution_function) == len(values) - - -def test_distribution_summary_from_request_times_concurrency(): - # create consistent timestamped values matching a rate of 10 per second - requests = [(val / 10, val / 10 + 1) for val in range(10001)] - distribution_summary = DistributionSummary.from_request_times( - requests, distribution_type="concurrency" - ) - assert distribution_summary.mean == pytest.approx(10.0, abs=0.01) - assert distribution_summary.median == pytest.approx(10.0) - assert distribution_summary.mode == 10.0 - assert distribution_summary.variance == pytest.approx(0, abs=0.1) - assert distribution_summary.std_dev == pytest.approx(0, abs=0.3) - assert distribution_summary.min == pytest.approx(1) - assert distribution_summary.max == pytest.approx(10.0) - assert distribution_summary.count == 10 - assert distribution_summary.total_sum == pytest.approx(55.0) - assert distribution_summary.percentiles.p001 == pytest.approx(10, abs=5) - assert distribution_summary.percentiles.p01 == pytest.approx(10) - assert distribution_summary.percentiles.p05 == pytest.approx(10) - assert distribution_summary.percentiles.p10 == pytest.approx(10) - assert distribution_summary.percentiles.p25 == pytest.approx(10) - assert distribution_summary.percentiles.p50 == pytest.approx(10) - assert distribution_summary.percentiles.p75 == pytest.approx(10) - assert distribution_summary.percentiles.p90 == pytest.approx(10) - assert distribution_summary.percentiles.p95 == pytest.approx(10) - assert distribution_summary.percentiles.p99 == pytest.approx(10) - assert distribution_summary.percentiles.p999 == pytest.approx(10) - assert distribution_summary.cumulative_distribution_function is None - - distribution_summary_cdf = DistributionSummary.from_request_times( - requests, distribution_type="concurrency", include_cdf=True - ) - assert distribution_summary_cdf.cumulative_distribution_function is not None - assert len(distribution_summary_cdf.cumulative_distribution_function) == 10 - - -def test_distribution_summary_from_request_times_rate(): - # create consistent timestamped values matching a rate of 10 per second - requests = [(val / 10, val / 10 + 1) for val in range(10001)] - distribution_summary = DistributionSummary.from_request_times( - requests, distribution_type="rate" - ) - assert distribution_summary.mean == pytest.approx(10.0, abs=0.01) - assert distribution_summary.median == pytest.approx(10.0) - assert distribution_summary.mode == pytest.approx(10.0) - assert distribution_summary.variance == pytest.approx(0, abs=0.1) - assert distribution_summary.std_dev == pytest.approx(0, abs=0.3) - assert distribution_summary.min == pytest.approx(1.0) - assert distribution_summary.max == pytest.approx(10.0) - assert distribution_summary.count == 12 - assert distribution_summary.total_sum == pytest.approx(111.0) - assert distribution_summary.percentiles.p001 == pytest.approx(10.0, abs=0.5) - assert distribution_summary.percentiles.p01 == pytest.approx(10.0) - assert distribution_summary.percentiles.p05 == pytest.approx(10.0) - assert distribution_summary.percentiles.p10 == pytest.approx(10.0) - assert distribution_summary.percentiles.p25 == pytest.approx(10.0) - assert distribution_summary.percentiles.p50 == pytest.approx(10.0) - assert distribution_summary.percentiles.p75 == pytest.approx(10.0) - assert distribution_summary.percentiles.p90 == pytest.approx(10.0) - assert distribution_summary.percentiles.p95 == pytest.approx(10.0) - assert distribution_summary.percentiles.p99 == pytest.approx(10.0) - assert distribution_summary.percentiles.p999 == pytest.approx(10.0) - assert distribution_summary.cumulative_distribution_function is None - - distribution_summary_cdf = DistributionSummary.from_request_times( - requests, distribution_type="rate", include_cdf=True - ) - assert distribution_summary_cdf.cumulative_distribution_function is not None - assert len(distribution_summary_cdf.cumulative_distribution_function) == 12 - - -def test_distribution_summary_from_iterable_request_times(): - # create consistent timestamped values matching a rate of 10 per second - requests = [(val / 10, val / 10 + 1) for val in range(10001)] - # create 9 iterations for each request with first iter at start + 0.1 - # and spaced at 0.1 seconds apart - first_iter_times = [val / 10 + 0.1 for val in range(10001)] - iter_counts = [9 for _ in range(10001)] - first_iter_counts = [1 for _ in range(10001)] - - distribution_summary = DistributionSummary.from_iterable_request_times( - requests, first_iter_times, iter_counts, first_iter_counts - ) - assert distribution_summary.mean == pytest.approx(90.0, abs=0.1) - assert distribution_summary.median == pytest.approx(80.0) - assert distribution_summary.mode == pytest.approx(80.0) - assert distribution_summary.variance == pytest.approx(704.463, abs=0.001) - assert distribution_summary.std_dev == pytest.approx(26.541, abs=0.001) - assert distribution_summary.min == pytest.approx(0.0) - assert distribution_summary.max == pytest.approx(160.0) - assert distribution_summary.count == 44 - assert distribution_summary.total_sum == pytest.approx(3538.85, abs=0.01) - assert distribution_summary.percentiles.p001 == pytest.approx(80.0) - assert distribution_summary.percentiles.p01 == pytest.approx(80.0) - assert distribution_summary.percentiles.p05 == pytest.approx(80.0) - assert distribution_summary.percentiles.p10 == pytest.approx(80.0) - assert distribution_summary.percentiles.p25 == pytest.approx(80.0) - assert distribution_summary.percentiles.p50 == pytest.approx(80.0) - assert distribution_summary.percentiles.p75 == pytest.approx(80.0) - assert distribution_summary.percentiles.p90 == pytest.approx(160.0) - assert distribution_summary.percentiles.p95 == pytest.approx(160.0) - assert distribution_summary.percentiles.p99 == pytest.approx(160.0) - assert distribution_summary.percentiles.p999 == pytest.approx(160.0) - assert distribution_summary.cumulative_distribution_function is None - - distribution_summary_cdf = DistributionSummary.from_iterable_request_times( - requests, first_iter_times, iter_counts, first_iter_counts, include_cdf=True - ) - assert distribution_summary_cdf.cumulative_distribution_function is not None - assert len(distribution_summary_cdf.cumulative_distribution_function) == 44 - - -def test_status_distribution_summary_initialization(): - status_distribution_summary = StatusDistributionSummary( - total=create_default_distribution_summary(), - successful=create_default_distribution_summary(), - incomplete=create_default_distribution_summary(), - errored=create_default_distribution_summary(), - ) - assert status_distribution_summary.total.mean == 50.0 - assert status_distribution_summary.successful.mean == 50.0 - assert status_distribution_summary.incomplete.mean == 50.0 - assert status_distribution_summary.errored.mean == 50.0 - - -def test_status_distribution_summary_marshalling(): - status_distribution_summary = StatusDistributionSummary( - total=create_default_distribution_summary(), - successful=create_default_distribution_summary(), - incomplete=create_default_distribution_summary(), - errored=create_default_distribution_summary(), - ) - serialized = status_distribution_summary.model_dump() - deserialized = StatusDistributionSummary.model_validate(serialized) - - for key, value in vars(status_distribution_summary).items(): - for child_key, child_value in vars(value).items(): - assert getattr(getattr(deserialized, key), child_key) == child_value - - -def test_status_distribution_summary_from_values(): - value_types: list[Literal["successful", "incomplete", "error"]] = [ - "successful", - "incomplete", - "error", - ] * 1000 - values = [float(val % 3) for val in range(3000)] - status_distribution_summary = StatusDistributionSummary.from_values( - value_types, values - ) - assert status_distribution_summary.total.count == len(values) - assert status_distribution_summary.total.mean == pytest.approx(np.mean(values)) - assert status_distribution_summary.total.cumulative_distribution_function is None - assert status_distribution_summary.successful.mean == pytest.approx( - np.mean( - [val for ind, val in enumerate(values) if value_types[ind] == "successful"] - ) - ) - assert status_distribution_summary.successful.count == len( - [val for ind, val in enumerate(values) if value_types[ind] == "successful"] - ) - assert ( - status_distribution_summary.successful.cumulative_distribution_function is None - ) - assert status_distribution_summary.incomplete.mean == pytest.approx( - np.mean( - [val for ind, val in enumerate(values) if value_types[ind] == "incomplete"] - ) - ) - assert status_distribution_summary.incomplete.count == len( - [val for ind, val in enumerate(values) if value_types[ind] == "incomplete"] - ) - assert ( - status_distribution_summary.incomplete.cumulative_distribution_function is None - ) - assert status_distribution_summary.errored.mean == pytest.approx( - np.mean([val for ind, val in enumerate(values) if value_types[ind] == "error"]) - ) - assert status_distribution_summary.errored.count == len( - [val for ind, val in enumerate(values) if value_types[ind] == "error"] - ) - assert status_distribution_summary.errored.cumulative_distribution_function is None - - status_distribution_summary_cdf = StatusDistributionSummary.from_values( - value_types, values, include_cdf=True - ) - assert ( - status_distribution_summary_cdf.total.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.successful.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.incomplete.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.errored.cumulative_distribution_function - is not None - ) - - -def test_status_distribution_summary_from_request_times(): - request_types: list[Literal["successful", "incomplete", "error"]] = [ - "successful", - "incomplete", - "error", - ] * 1000 - requests = [((val % 3) / 10, (val % 3) / 10 + 1) for val in range(3000)] - status_distribution_summary = StatusDistributionSummary.from_request_times( - request_types, requests, distribution_type="concurrency" - ) - assert status_distribution_summary.total.mean == pytest.approx(2500.0, abs=0.01) - assert status_distribution_summary.total.cumulative_distribution_function is None - assert status_distribution_summary.successful.mean == pytest.approx( - 1000.0, abs=0.01 - ) - assert ( - status_distribution_summary.successful.cumulative_distribution_function is None - ) - assert status_distribution_summary.incomplete.mean == pytest.approx( - 1000.0, abs=0.01 - ) - assert ( - status_distribution_summary.incomplete.cumulative_distribution_function is None - ) - assert status_distribution_summary.errored.mean == pytest.approx(1000.0, abs=0.01) - assert status_distribution_summary.errored.cumulative_distribution_function is None - - status_distribution_summary_cdf = StatusDistributionSummary.from_request_times( - request_types, requests, distribution_type="concurrency", include_cdf=True - ) - assert ( - status_distribution_summary_cdf.total.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.successful.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.incomplete.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.errored.cumulative_distribution_function - is not None - ) - - -def test_status_distribution_summary_from_iterable_request_times(): - request_types: list[Literal["successful", "incomplete", "error"]] = [ - "successful", - "incomplete", - "error", - ] * 1000 - requests = [(val % 3 / 10, val % 3 / 10 + 1) for val in range(3000)] - first_iter_times = [val % 3 / 10 + 0.1 for val in range(3000)] - iter_counts = [9 for _ in range(3000)] - first_iter_counts = [1 for _ in range(3000)] - status_distribution_summary = StatusDistributionSummary.from_iterable_request_times( - request_types, - requests, - first_iter_times, - iter_counts, - first_iter_counts, - ) - assert status_distribution_summary.total.mean == pytest.approx(21666.66, abs=0.01) - assert status_distribution_summary.total.cumulative_distribution_function is None - assert status_distribution_summary.successful.mean == pytest.approx( - 8000.0, abs=0.01 - ) - assert ( - status_distribution_summary.successful.cumulative_distribution_function is None - ) - assert status_distribution_summary.incomplete.mean == pytest.approx( - 8000.0, abs=0.01 - ) - assert ( - status_distribution_summary.incomplete.cumulative_distribution_function is None - ) - assert status_distribution_summary.errored.mean == pytest.approx(8000.0, abs=0.01) - assert status_distribution_summary.errored.cumulative_distribution_function is None - - status_distribution_summary_cdf = ( - StatusDistributionSummary.from_iterable_request_times( - request_types, - requests, - first_iter_times, - iter_counts, - first_iter_counts, - include_cdf=True, - ) - ) - assert ( - status_distribution_summary_cdf.total.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.successful.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.incomplete.cumulative_distribution_function - is not None - ) - assert ( - status_distribution_summary_cdf.errored.cumulative_distribution_function - is not None - ) - - -def test_running_stats_initialization(): - running_stats = RunningStats() - assert running_stats.start_time == pytest.approx(time.time(), abs=0.01) - assert running_stats.count == 0 - assert running_stats.total == 0 - assert running_stats.last == 0 - assert running_stats.mean == 0 - assert running_stats.rate == 0 - - -def test_running_stats_marshalling(): - running_stats = RunningStats() - serialized = running_stats.model_dump() - deserialized = RunningStats.model_validate(serialized) - - for key, value in vars(running_stats).items(): - assert getattr(deserialized, key) == value - - -def test_running_stats_update(): - running_stats = RunningStats() - running_stats.update(1) - assert running_stats.count == 1 - assert running_stats.total == 1 - assert running_stats.last == 1 - assert running_stats.mean == 1 - time.sleep(1.0) - assert running_stats.rate == pytest.approx( - 1.0 / (time.time() - running_stats.start_time), abs=0.1 - ) - - running_stats.update(2) - assert running_stats.count == 2 - assert running_stats.total == 3 - assert running_stats.last == 2 - assert running_stats.mean == 1.5 - time.sleep(1) - assert running_stats.rate == pytest.approx( - 3 / (time.time() - running_stats.start_time), abs=0.1 - ) - - -def test_running_stats_add(): - running_stats = RunningStats() - mean = running_stats + 1 - assert mean == 1 - assert mean == running_stats.mean - assert running_stats.count == 1 - assert running_stats.total == 1 - assert running_stats.last == 1 - - -def test_running_stats_iadd(): - running_stats = RunningStats() - running_stats += 1 - assert running_stats.count == 1 - assert running_stats.total == 1 - assert running_stats.last == 1 - assert running_stats.mean == 1 - - -def test_time_running_stats_initialization(): - time_running_stats = TimeRunningStats() - assert time_running_stats.start_time == pytest.approx(time.time(), abs=0.01) - assert time_running_stats.count == 0 - assert time_running_stats.total == 0 - assert time_running_stats.last == 0 - assert time_running_stats.mean == 0 - assert time_running_stats.rate == 0 - assert time_running_stats.total_ms == 0 - assert time_running_stats.last_ms == 0 - assert time_running_stats.mean_ms == 0 - assert time_running_stats.rate_ms == 0 - - -def test_time_running_stats_marshalling(): - time_running_stats = TimeRunningStats() - serialized = time_running_stats.model_dump() - deserialized = TimeRunningStats.model_validate(serialized) - - for key, value in vars(time_running_stats).items(): - assert getattr(deserialized, key) == value - - -def test_time_running_stats_update(): - time_running_stats = TimeRunningStats() - time_running_stats.update(1) - assert time_running_stats.count == 1 - assert time_running_stats.total == 1 - assert time_running_stats.last == 1 - assert time_running_stats.mean == 1 - assert time_running_stats.total_ms == 1000 - assert time_running_stats.last_ms == 1000 - assert time_running_stats.mean_ms == 1000 - time.sleep(1.0) - assert time_running_stats.rate == pytest.approx( - 1.0 / (time.time() - time_running_stats.start_time), abs=0.1 - ) - assert time_running_stats.rate_ms == pytest.approx( - 1000 / (time.time() - time_running_stats.start_time), abs=0.1 - ) - - time_running_stats.update(2) - assert time_running_stats.count == 2 - assert time_running_stats.total == 3 - assert time_running_stats.last == 2 - assert time_running_stats.mean == 1.5 - assert time_running_stats.total_ms == 3000 - assert time_running_stats.last_ms == 2000 - assert time_running_stats.mean_ms == 1500 - time.sleep(1) - assert time_running_stats.rate == pytest.approx( - 3 / (time.time() - time_running_stats.start_time), abs=0.1 - ) - assert time_running_stats.rate_ms == pytest.approx( - 3000 / (time.time() - time_running_stats.start_time), abs=0.1 - ) - - -@pytest.mark.regression -def test_distribution_summary_concurrency_double_counting_regression(): - """Specific regression test for the double-counting bug in concurrency calculation. - - Before the fix, when events were merged due to epsilon, the deltas were summed - but then the active count wasn't properly accumulated, causing incorrect results. - - ### WRITTEN BY AI ### - """ - epsilon = 1e-6 - - # Create a scenario where multiple requests start at exactly the same time - # This should result in events being merged, testing the accumulation logic - same_start_time = 1.0 - requests = [ - (same_start_time, 3.0), - (same_start_time, 4.0), - (same_start_time, 5.0), - (same_start_time + epsilon / 3, 6.0), # Very close start (within epsilon) - ] - - distribution_summary = DistributionSummary.from_request_times( - requests, distribution_type="concurrency", epsilon=epsilon - ) - - # All requests start at the same time (or within epsilon), so they should - # all be considered concurrent from the start - # Expected timeline: - # - t=1.0-3.0: 4 concurrent requests - # - t=3.0-4.0: 3 concurrent requests - # - t=4.0-5.0: 2 concurrent requests - # - t=5.0-6.0: 1 concurrent request - - assert distribution_summary.max == 4.0 # All 4 requests concurrent at start - assert distribution_summary.min == 1.0 # 1 request still running at the end - - -@pytest.mark.sanity -def test_distribution_summary_concurrency_epsilon_edge_case(): - """Test the exact epsilon boundary condition. - - ### WRITTEN BY AI ### - """ - epsilon = 1e-6 - - # Test requests that are exactly epsilon apart - should be merged - requests_exactly_epsilon = [ - (1.0, 2.0), - (1.0 + epsilon, 2.5), # Exactly epsilon apart - (2.0, 2.5), # Another close request - ] - - dist_epsilon = DistributionSummary.from_request_times( - requests_exactly_epsilon, distribution_type="concurrency", epsilon=epsilon - ) - - # Should be treated as concurrent (merged events) - assert dist_epsilon.max == 2.0 - assert dist_epsilon.min == 2.0 - - # Test requests that are just over epsilon apart - should NOT be merged - requests_over_epsilon = [ - (1.0, 2.0), - (1.0 + epsilon * 1.1, 2.5), # Just over epsilon apart - (2.0, 2.5), # Another close request - ] - - dist_over_epsilon = DistributionSummary.from_request_times( - requests_over_epsilon, distribution_type="concurrency", epsilon=epsilon - ) - - # These should be treated separately, so max concurrency depends on overlap - # At t=1.0 to 1.0+epsilon*1.1: 1 concurrent - # At t=1.0+epsilon*1.1 to 2.0: 2 concurrent - # At t=2.0 to 2.5: 1 concurrent - assert dist_over_epsilon.max == 2.0 - assert dist_over_epsilon.min == 1.0 diff --git a/tests/unit/utils/test_functions.py b/tests/unit/utils/test_functions.py new file mode 100644 index 00000000..3b353759 --- /dev/null +++ b/tests/unit/utils/test_functions.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +from datetime import datetime + +import pytest + +from guidellm.utils.functions import ( + all_defined, + safe_add, + safe_divide, + safe_format_timestamp, + safe_getattr, + safe_multiply, +) + + +class TestAllDefined: + """Test suite for all_defined function.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("values", "expected"), + [ + ((1, 2, 3), True), + (("test", "hello"), True), + ((0, False, ""), True), + ((1, None, 3), False), + ((None,), False), + ((None, None), False), + ((), True), + ], + ) + def test_invocation(self, values, expected): + """Test all_defined with valid inputs.""" + result = all_defined(*values) + assert result == expected + + @pytest.mark.sanity + def test_mixed_types(self): + """Test all_defined with mixed data types.""" + result = all_defined(1, "test", [], {}, 0.0, False) + assert result is True + + result = all_defined(1, "test", None, {}) + assert result is False + + +class TestSafeGetattr: + """Test suite for safe_getattr function.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("obj", "attr", "default", "expected"), + [ + (None, "any_attr", "default_val", "default_val"), + (None, "any_attr", None, None), + ("test_string", "nonexistent", "default_val", "default_val"), + ], + ) + def test_invocation(self, obj, attr, default, expected): + """Test safe_getattr with valid inputs.""" + result = safe_getattr(obj, attr, default) + assert result == expected + + @pytest.mark.smoke + def test_with_object(self): + """Test safe_getattr with actual object attributes.""" + + class TestObj: + test_attr = "test_value" + + obj = TestObj() + result = safe_getattr(obj, "test_attr", "default") + assert result == "test_value" + + result = safe_getattr(obj, "missing_attr", "default") + assert result == "default" + + # Test with method attribute + result = safe_getattr("test_string", "upper", None) + assert callable(result) + assert result() == "TEST_STRING" + + +class TestSafeDivide: + """Test suite for safe_divide function.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("numerator", "denominator", "num_default", "den_default", "expected"), + [ + (10, 2, 0.0, 1.0, 5.0), + (None, 2, 6.0, 1.0, 3.0), + (10, None, 0.0, 5.0, 2.0), + (None, None, 8.0, 4.0, 2.0), + (10, 0, 0.0, 1.0, 10 / 1e-10), + ], + ) + def test_invocation( + self, numerator, denominator, num_default, den_default, expected + ): + """Test safe_divide with valid inputs.""" + result = safe_divide(numerator, denominator, num_default, den_default) + assert result == pytest.approx(expected, rel=1e-6) + + @pytest.mark.sanity + def test_zero_division_protection(self): + """Test safe_divide protection against zero division.""" + result = safe_divide(10, 0) + assert result == 10 / 1e-10 + + result = safe_divide(5, None, den_default=0) + assert result == 5 / 1e-10 + + +class TestSafeMultiply: + """Test suite for safe_multiply function.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("values", "default", "expected"), + [ + ((2, 3, 4), 1.0, 24.0), + ((2, None, 4), 1.0, 8.0), + ((None, None), 5.0, 5.0), + ((), 3.0, 3.0), + ((2, 3, None, 5), 2.0, 60.0), + ], + ) + def test_invocation(self, values, default, expected): + """Test safe_multiply with valid inputs.""" + result = safe_multiply(*values, default=default) + assert result == expected + + @pytest.mark.sanity + def test_with_zero(self): + """Test safe_multiply with zero values.""" + result = safe_multiply(2, 0, 3, default=1.0) + assert result == 0.0 + + result = safe_multiply(None, 0, None, default=5.0) + assert result == 0.0 + + +class TestSafeAdd: + """Test suite for safe_add function.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("values", "signs", "default", "expected"), + [ + ((1, 2, 3), None, 0.0, 6.0), + ((1, None, 3), None, 5.0, 9.0), + ((10, 5), [1, -1], 0.0, 5.0), + ((None, None), [1, -1], 2.0, 0.0), + ((), None, 3.0, 3.0), + ((1, 2, 3), [1, 1, -1], 0.0, 0.0), + ], + ) + def test_invocation(self, values, signs, default, expected): + """Test safe_add with valid inputs.""" + result = safe_add(*values, signs=signs, default=default) + assert result == expected + + @pytest.mark.sanity + def test_invalid_signs_length(self): + """Test safe_add with invalid signs length.""" + with pytest.raises( + ValueError, match="Length of signs must match length of values" + ): + safe_add(1, 2, 3, signs=[1, -1]) + + @pytest.mark.sanity + def test_single_value(self): + """Test safe_add with single value.""" + result = safe_add(5, default=1.0) + assert result == 5.0 + + result = safe_add(None, default=3.0) + assert result == 3.0 + + +class TestSafeFormatTimestamp: + """Test suite for safe_format_timestamp function.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("timestamp", "format_", "default", "expected"), + [ + (1609459200.0, "%Y-%m-%d", "N/A", "2020-12-31"), + (1609459200.0, "%H:%M:%S", "N/A", "19:00:00"), + (None, "%H:%M:%S", "N/A", "N/A"), + (-1, "%H:%M:%S", "N/A", "N/A"), + (2**32, "%H:%M:%S", "N/A", "N/A"), + ], + ) + def test_invocation(self, timestamp, format_, default, expected): + """Test safe_format_timestamp with valid inputs.""" + result = safe_format_timestamp(timestamp, format_, default) + assert result == expected + + @pytest.mark.sanity + def test_edge_cases(self): + """Test safe_format_timestamp with edge case timestamps.""" + result = safe_format_timestamp(0.0, "%Y", "N/A") + assert result == "1969" + + result = safe_format_timestamp(1.0, "%Y", "N/A") + assert result == "1969" + + result = safe_format_timestamp(2**31 - 1, "%Y", "N/A") + expected_year = datetime.fromtimestamp(2**31 - 1).strftime("%Y") + assert result == expected_year + + @pytest.mark.sanity + def test_invalid_timestamp_ranges(self): + """Test safe_format_timestamp with invalid timestamp ranges.""" + result = safe_format_timestamp(2**31 + 1, "%Y", "ERROR") + assert result == "ERROR" + + result = safe_format_timestamp(-1000, "%Y", "ERROR") + assert result == "ERROR" diff --git a/tests/unit/utils/test_mixins.py b/tests/unit/utils/test_mixins.py new file mode 100644 index 00000000..cd8990de --- /dev/null +++ b/tests/unit/utils/test_mixins.py @@ -0,0 +1,245 @@ +from __future__ import annotations + +import pytest + +from guidellm.utils.mixins import InfoMixin + + +class TestInfoMixin: + """Test suite for InfoMixin.""" + + @pytest.fixture( + params=[ + {"attr_one": "test_value", "attr_two": 42}, + {"attr_one": "hello_world", "attr_two": 100, "attr_three": [1, 2, 3]}, + ], + ids=["basic_attributes", "extended_attributes"], + ) + def valid_instances(self, request): + """Fixture providing test data for InfoMixin.""" + constructor_args = request.param + + class TestClass(InfoMixin): + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + instance = TestClass(**constructor_args) + return instance, constructor_args + + @pytest.mark.smoke + def test_class_signatures(self): + """Test InfoMixin class signatures and methods.""" + assert hasattr(InfoMixin, "extract_from_obj") + assert callable(InfoMixin.extract_from_obj) + assert hasattr(InfoMixin, "create_info_dict") + assert callable(InfoMixin.create_info_dict) + assert hasattr(InfoMixin, "info") + assert isinstance(InfoMixin.info, property) + + @pytest.mark.smoke + def test_initialization(self, valid_instances): + """Test InfoMixin initialization through inheritance.""" + instance, constructor_args = valid_instances + assert isinstance(instance, InfoMixin) + for key, value in constructor_args.items(): + assert hasattr(instance, key) + assert getattr(instance, key) == value + + @pytest.mark.smoke + def test_info_property(self, valid_instances): + """Test InfoMixin.info property.""" + instance, constructor_args = valid_instances + result = instance.info + assert isinstance(result, dict) + assert "str" in result + assert "type" in result + assert "class" in result + assert "module" in result + assert "attributes" in result + assert result["type"] == "TestClass" + assert result["class"] == "TestClass" + assert isinstance(result["attributes"], dict) + for key, value in constructor_args.items(): + assert key in result["attributes"] + assert result["attributes"][key] == value + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("obj_data", "expected_attributes"), + [ + ({"name": "test", "value": 42}, {"name": "test", "value": 42}), + ({"data": [1, 2, 3], "flag": True}, {"data": [1, 2, 3], "flag": True}), + ({"nested": {"key": "value"}}, {"nested": {"key": "value"}}), + ], + ) + def test_create_info_dict(self, obj_data, expected_attributes): + """Test InfoMixin.create_info_dict class method.""" + + class SimpleObject: + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + obj = SimpleObject(**obj_data) + result = InfoMixin.create_info_dict(obj) + + assert isinstance(result, dict) + assert "str" in result + assert "type" in result + assert "class" in result + assert "module" in result + assert "attributes" in result + assert result["type"] == "SimpleObject" + assert result["class"] == "SimpleObject" + assert result["attributes"] == expected_attributes + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("obj_data", "expected_attributes"), + [ + ({"name": "test", "value": 42}, {"name": "test", "value": 42}), + ({"data": [1, 2, 3], "flag": True}, {"data": [1, 2, 3], "flag": True}), + ], + ) + def test_extract_from_obj_without_info(self, obj_data, expected_attributes): + """Test InfoMixin.extract_from_obj with objects without info method.""" + + class SimpleObject: + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + obj = SimpleObject(**obj_data) + result = InfoMixin.extract_from_obj(obj) + + assert isinstance(result, dict) + assert "str" in result + assert "type" in result + assert "class" in result + assert "module" in result + assert "attributes" in result + assert result["type"] == "SimpleObject" + assert result["class"] == "SimpleObject" + assert result["attributes"] == expected_attributes + + @pytest.mark.smoke + def test_extract_from_obj_with_info_method(self): + """Test InfoMixin.extract_from_obj with objects that have info method.""" + + class ObjectWithInfoMethod: + def info(self): + return {"custom": "info_method", "type": "custom_type"} + + obj = ObjectWithInfoMethod() + result = InfoMixin.extract_from_obj(obj) + + assert result == {"custom": "info_method", "type": "custom_type"} + + @pytest.mark.smoke + def test_extract_from_obj_with_info_property(self): + """Test InfoMixin.extract_from_obj with objects that have info property.""" + + class ObjectWithInfoProperty: + @property + def info(self): + return {"custom": "info_property", "type": "custom_type"} + + obj = ObjectWithInfoProperty() + result = InfoMixin.extract_from_obj(obj) + + assert result == {"custom": "info_property", "type": "custom_type"} + + @pytest.mark.sanity + @pytest.mark.parametrize( + ("obj_type", "obj_value"), + [ + (str, "test_string"), + (int, 42), + (float, 3.14), + (list, [1, 2, 3]), + (dict, {"key": "value"}), + ], + ) + def test_extract_from_obj_builtin_types(self, obj_type, obj_value): + """Test InfoMixin.extract_from_obj with built-in types.""" + result = InfoMixin.extract_from_obj(obj_value) + + assert isinstance(result, dict) + assert "str" in result + assert "type" in result + assert result["type"] == obj_type.__name__ + assert result["str"] == str(obj_value) + + @pytest.mark.sanity + def test_extract_from_obj_without_dict(self): + """Test InfoMixin.extract_from_obj with objects without __dict__.""" + obj = 42 + result = InfoMixin.extract_from_obj(obj) + + assert isinstance(result, dict) + assert "attributes" in result + assert result["attributes"] == {} + assert result["type"] == "int" + assert result["str"] == "42" + + @pytest.mark.sanity + def test_extract_from_obj_with_private_attributes(self): + """Test InfoMixin.extract_from_obj filters private attributes.""" + + class ObjectWithPrivate: + def __init__(self): + self.public_attr = "public" + self._private_attr = "private" + self.__very_private = "very_private" + + obj = ObjectWithPrivate() + result = InfoMixin.extract_from_obj(obj) + + assert "public_attr" in result["attributes"] + assert result["attributes"]["public_attr"] == "public" + assert "_private_attr" not in result["attributes"] + assert "__very_private" not in result["attributes"] + + @pytest.mark.sanity + def test_extract_from_obj_complex_attributes(self): + """Test InfoMixin.extract_from_obj with complex attribute types.""" + + class ComplexObject: + def __init__(self): + self.simple_str = "test" + self.simple_int = 42 + self.simple_list = [1, 2, 3] + self.simple_dict = {"key": "value"} + self.complex_object = object() + + obj = ComplexObject() + result = InfoMixin.extract_from_obj(obj) + + attributes = result["attributes"] + assert attributes["simple_str"] == "test" + assert attributes["simple_int"] == 42 + assert attributes["simple_list"] == [1, 2, 3] + assert attributes["simple_dict"] == {"key": "value"} + assert isinstance(attributes["complex_object"], str) + + @pytest.mark.regression + def test_create_info_dict_consistency(self, valid_instances): + """Test InfoMixin.create_info_dict produces consistent results.""" + instance, _ = valid_instances + + result1 = InfoMixin.create_info_dict(instance) + result2 = InfoMixin.create_info_dict(instance) + + assert result1 == result2 + assert result1 is not result2 + + @pytest.mark.regression + def test_info_property_uses_create_info_dict(self, valid_instances): + """Test InfoMixin.info property uses create_info_dict method.""" + instance, _ = valid_instances + + info_result = instance.info + create_result = InfoMixin.create_info_dict(instance) + + assert info_result == create_result diff --git a/tests/unit/utils/test_statistics.py b/tests/unit/utils/test_statistics.py new file mode 100644 index 00000000..c820de9d --- /dev/null +++ b/tests/unit/utils/test_statistics.py @@ -0,0 +1,990 @@ +""" +Statistical analysis utilities for distribution calculations and running metrics. + +Provides comprehensive statistical computation tools for analyzing numerical +distributions, percentiles, and streaming data. Includes specialized support for +request timing analysis, concurrency measurement, and rate calculations. Integrates +with Pydantic for serializable statistical models and supports both weighted and +unweighted distributions with cumulative distribution function (CDF) generation. +""" + +from __future__ import annotations + +import math +import time as timer +from collections import defaultdict +from typing import Any, Literal + +import numpy as np +from pydantic import Field, computed_field + +from guidellm.utils.pydantic_utils import StandardBaseModel, StatusBreakdown + +__all__ = [ + "DistributionSummary", + "Percentiles", + "RunningStats", + "StatusDistributionSummary", + "TimeRunningStats", +] + + +class Percentiles(StandardBaseModel): + """ + Standard percentiles model for statistical distribution analysis. + + Provides complete percentile coverage from 0.1th to 99.9th percentiles for + statistical distribution characterization. Used as a component within + DistributionSummary to provide detailed distribution shape analysis. + """ + + p001: float = Field( + description="The 0.1th percentile of the distribution.", + ) + p01: float = Field( + description="The 1st percentile of the distribution.", + ) + p05: float = Field( + description="The 5th percentile of the distribution.", + ) + p10: float = Field( + description="The 10th percentile of the distribution.", + ) + p25: float = Field( + description="The 25th percentile of the distribution.", + ) + p50: float = Field( + description="The 50th percentile of the distribution.", + ) + p75: float = Field( + description="The 75th percentile of the distribution.", + ) + p90: float = Field( + description="The 90th percentile of the distribution.", + ) + p95: float = Field( + description="The 95th percentile of the distribution.", + ) + p99: float = Field( + description="The 99th percentile of the distribution.", + ) + p999: float = Field( + description="The 99.9th percentile of the distribution.", + ) + + +class DistributionSummary(StandardBaseModel): + """ + Comprehensive statistical summary for numerical value distributions. + + Calculates and stores complete statistical metrics including central tendency, + dispersion, extremes, and percentiles for any numerical distribution. Supports + both weighted and unweighted data with optional cumulative distribution function + generation. Primary statistical analysis tool for request timing, performance + metrics, and benchmark result characterization. + + Example: + :: + # Create from simple values + summary = DistributionSummary.from_values([1.0, 2.0, 3.0, 4.0, 5.0]) + print(f"Mean: {summary.mean}, P95: {summary.percentiles.p95}") + + # Create from request timings for concurrency analysis + requests = [(0.0, 1.0), (0.5, 2.0), (1.0, 2.5)] + concurrency = DistributionSummary.from_request_times( + requests, "concurrency" + ) + """ + + mean: float = Field( + description="The mean/average of the distribution.", + ) + median: float = Field( + description="The median of the distribution.", + ) + mode: float = Field( + description="The mode of the distribution.", + ) + variance: float = Field( + description="The variance of the distribution.", + ) + std_dev: float = Field( + description="The standard deviation of the distribution.", + ) + min: float = Field( + description="The minimum value of the distribution.", + ) + max: float = Field( + description="The maximum value of the distribution.", + ) + count: int = Field( + description="The number of values in the distribution.", + ) + total_sum: float = Field( + description="The total sum of the values in the distribution.", + ) + percentiles: Percentiles = Field( + description="The percentiles of the distribution.", + ) + cumulative_distribution_function: list[tuple[float, float]] | None = Field( + description="The cumulative distribution function (CDF) of the distribution.", + default=None, + ) + + @staticmethod + def from_distribution_function( + distribution: list[tuple[float, float]], + include_cdf: bool = False, + ) -> DistributionSummary: + """ + Create statistical summary from weighted distribution or probability function. + + Converts weighted numerical values or probability distribution function (PDF) + into comprehensive statistical summary. Normalizes weights to probabilities + and calculates all statistical metrics including percentiles. + + :param distribution: List of (value, weight) or (value, probability) tuples + representing the distribution + :param include_cdf: Whether to include cumulative distribution function + in the output + :return: DistributionSummary instance with calculated statistical metrics + """ + values, weights = zip(*distribution) if distribution else ([], []) + values = np.array(values) # type: ignore[assignment] + weights = np.array(weights) # type: ignore[assignment] + + # create the PDF + probabilities = weights / np.sum(weights) # type: ignore[operator] + pdf = np.column_stack((values, probabilities)) + pdf = pdf[np.argsort(pdf[:, 0])] + values = pdf[:, 0] # type: ignore[assignment] + probabilities = pdf[:, 1] + + # calculate the CDF + cumulative_probabilities = np.cumsum(probabilities) + cdf = np.column_stack((values, cumulative_probabilities)) + + # calculate statistics + mean = np.sum(values * probabilities).item() # type: ignore[attr-defined] + median = cdf[np.argmax(cdf[:, 1] >= 0.5), 0].item() if len(cdf) > 0 else 0 # noqa: PLR2004 + mode = values[np.argmax(probabilities)].item() if len(values) > 0 else 0 # type: ignore[call-overload] + variance = np.sum((values - mean) ** 2 * probabilities).item() # type: ignore[attr-defined] + std_dev = math.sqrt(variance) + minimum = values[0].item() if len(values) > 0 else 0 + maximum = values[-1].item() if len(values) > 0 else 0 + count = len(values) + total_sum = np.sum(values).item() # type: ignore[attr-defined] + + return DistributionSummary( + mean=mean, + median=median, + mode=mode, + variance=variance, + std_dev=std_dev, + min=minimum, + max=maximum, + count=count, + total_sum=total_sum, + percentiles=( + Percentiles( + p001=cdf[np.argmax(cdf[:, 1] >= 0.001), 0].item(), # noqa: PLR2004 + p01=cdf[np.argmax(cdf[:, 1] >= 0.01), 0].item(), # noqa: PLR2004 + p05=cdf[np.argmax(cdf[:, 1] >= 0.05), 0].item(), # noqa: PLR2004 + p10=cdf[np.argmax(cdf[:, 1] >= 0.1), 0].item(), # noqa: PLR2004 + p25=cdf[np.argmax(cdf[:, 1] >= 0.25), 0].item(), # noqa: PLR2004 + p50=cdf[np.argmax(cdf[:, 1] >= 0.50), 0].item(), # noqa: PLR2004 + p75=cdf[np.argmax(cdf[:, 1] >= 0.75), 0].item(), # noqa: PLR2004 + p90=cdf[np.argmax(cdf[:, 1] >= 0.9), 0].item(), # noqa: PLR2004 + p95=cdf[np.argmax(cdf[:, 1] >= 0.95), 0].item(), # noqa: PLR2004 + p99=cdf[np.argmax(cdf[:, 1] >= 0.99), 0].item(), # noqa: PLR2004 + p999=cdf[np.argmax(cdf[:, 1] >= 0.999), 0].item(), # noqa: PLR2004 + ) + if len(cdf) > 0 + else Percentiles( + p001=0, + p01=0, + p05=0, + p10=0, + p25=0, + p50=0, + p75=0, + p90=0, + p95=0, + p99=0, + p999=0, + ) + ), + cumulative_distribution_function=cdf.tolist() if include_cdf else None, + ) + + @staticmethod + def from_values( + values: list[float], + weights: list[float] | None = None, + include_cdf: bool = False, + ) -> DistributionSummary: + """ + Create statistical summary from numerical values with optional weights. + + Wrapper around from_distribution_function for simple value lists. If weights + are not provided, all values are equally weighted. Enables statistical + analysis of any numerical dataset. + + :param values: Numerical values representing the distribution + :param weights: Optional weights for each value. If not provided, all values + are equally weighted + :param include_cdf: Whether to include cumulative distribution function in + the output DistributionSummary + :return: DistributionSummary instance with calculated statistical metrics + :raises ValueError: If values and weights lists have different lengths + """ + if weights is None: + weights = [1.0] * len(values) + + if len(values) != len(weights): + raise ValueError( + "The length of values and weights must be the same.", + ) + + return DistributionSummary.from_distribution_function( + distribution=list(zip(values, weights)), + include_cdf=include_cdf, + ) + + @staticmethod + def from_request_times( + requests: list[tuple[float, float]], + distribution_type: Literal["concurrency", "rate"], + include_cdf: bool = False, + epsilon: float = 1e-6, + ) -> DistributionSummary: + """ + Create statistical summary from request timing data. + + Analyzes request start/end times to calculate concurrency or rate + distributions. Converts timing events into statistical metrics for + performance analysis and load characterization. + + :param requests: List of (start_time, end_time) tuples for each request + :param distribution_type: Type of analysis - "concurrency" for simultaneous + requests or "rate" for completion rates + :param include_cdf: Whether to include cumulative distribution function + :param epsilon: Threshold for merging close timing events + :return: DistributionSummary with timing-based statistical metrics + :raises ValueError: If distribution_type is not "concurrency" or "rate" + """ + if distribution_type == "concurrency": + # convert to delta changes based on when requests were running + time_deltas: dict[float, int] = defaultdict(int) + for start, end in requests: + time_deltas[start] += 1 + time_deltas[end] -= 1 + + # convert to the events over time measuring concurrency changes + events = [] + active = 0 + + for time, delta in sorted(time_deltas.items()): + active += delta + events.append((time, active)) + elif distribution_type == "rate": + # convert to events for when requests finished + global_start = min(start for start, _ in requests) if requests else 0 + events = [(global_start, 1)] + [(end, 1) for _, end in requests] + else: + raise ValueError( + f"Invalid distribution_type '{distribution_type}'. " + "Must be 'concurrency' or 'rate'." + ) + + # combine any events that are very close together + flattened_events: list[tuple[float, float]] = [] + for time, val in sorted(events): + last_time, last_val = ( + flattened_events[-1] if flattened_events else (None, None) + ) + + if ( + last_time is not None + and last_val is not None + and abs(last_time - time) <= epsilon + ): + flattened_events[-1] = (last_time, last_val + val) + else: + flattened_events.append((time, val)) + + # convert to value distribution function + distribution: dict[float, float] = defaultdict(float) + + for ind in range(len(flattened_events) - 1): + start_time, value = flattened_events[ind] + end_time, _ = flattened_events[ind + 1] + duration = end_time - start_time + + if distribution_type == "concurrency": + # weight the concurrency value by the duration + distribution[value] += duration + elif distribution_type == "rate": + # weight the rate value by the duration + rate = value / duration + distribution[rate] += duration + + distribution_list: list[tuple[float, float]] = sorted(distribution.items()) + + return DistributionSummary.from_distribution_function( + distribution=distribution_list, + include_cdf=include_cdf, + ) + + @staticmethod + def from_iterable_request_times( + requests: list[tuple[float, float]], + first_iter_times: list[float], + iter_counts: list[int], + first_iter_counts: list[int] | None = None, + include_cdf: bool = False, + epsilon: float = 1e-6, + ) -> DistributionSummary: + """ + Create statistical summary from iterative request timing data. + + Analyzes autoregressive or streaming requests with multiple iterations + between start and end times. Calculates rate distributions based on + iteration timing patterns for LLM token generation analysis. + + :param requests: List of (start_time, end_time) tuples for each request + :param first_iter_times: Times when first iteration was received for + each request + :param iter_counts: Total iteration counts for each request from first + iteration to end + :param first_iter_counts: Iteration counts for first iteration (defaults + to 1 for each request) + :param include_cdf: Whether to include cumulative distribution function + :param epsilon: Threshold for merging close timing events + :return: DistributionSummary with iteration rate statistical metrics + :raises ValueError: If input lists have mismatched lengths + """ + + if first_iter_counts is None: + first_iter_counts = [1] * len(requests) + + if ( + len(requests) != len(first_iter_times) + or len(requests) != len(iter_counts) + or len(requests) != len(first_iter_counts) + ): + raise ValueError( + "requests, first_iter_times, iter_counts, and first_iter_counts must" + "be the same length." + f"Given {len(requests)}, {len(first_iter_times)}, {len(iter_counts)}, " + f"{len(first_iter_counts)}", + ) + + # first break up the requests into individual iterable events + events = defaultdict(int) + global_start = min(start for start, _ in requests) if requests else 0 + global_end = max(end for _, end in requests) if requests else 0 + events[global_start] = 0 + events[global_end] = 0 + + for (_, end), first_iter, first_iter_count, total_count in zip( + requests, first_iter_times, first_iter_counts, iter_counts + ): + events[first_iter] += first_iter_count + + if total_count > 1: + iter_latency = (end - first_iter) / (total_count - 1) + for ind in range(1, total_count): + events[first_iter + ind * iter_latency] += 1 + + # combine any events that are very close together + flattened_events: list[tuple[float, int]] = [] + + for time, count in sorted(events.items()): + last_time, last_count = ( + flattened_events[-1] if flattened_events else (None, None) + ) + + if ( + last_time is not None + and last_count is not None + and abs(last_time - time) <= epsilon + ): + flattened_events[-1] = (last_time, last_count + count) + else: + flattened_events.append((time, count)) + + # convert to value distribution function + distribution: dict[float, float] = defaultdict(float) + + for ind in range(len(flattened_events) - 1): + start_time, count = flattened_events[ind] + end_time, _ = flattened_events[ind + 1] + duration = end_time - start_time + rate = count / duration + distribution[rate] += duration + + distribution_list = sorted(distribution.items()) + + return DistributionSummary.from_distribution_function( + distribution=distribution_list, + include_cdf=include_cdf, + ) + + +class StatusDistributionSummary( + StatusBreakdown[ + DistributionSummary, + DistributionSummary, + DistributionSummary, + DistributionSummary, + ] +): + """ + Status-grouped statistical summary for request processing analysis. + + Provides comprehensive statistical analysis grouped by request status (total, + successful, incomplete, errored). Enables performance analysis across different + request outcomes for benchmarking and monitoring applications. Each status + category maintains complete DistributionSummary metrics. + + Example: + :: + status_summary = StatusDistributionSummary.from_values( + value_types=["successful", "error", "successful"], + values=[1.5, 10.0, 2.1] + ) + print(f"Success mean: {status_summary.successful.mean}") + print(f"Error rate: {status_summary.errored.count}") + """ + + @staticmethod + def from_values( + value_types: list[Literal["successful", "incomplete", "error"]], + values: list[float], + weights: list[float] | None = None, + include_cdf: bool = False, + ) -> StatusDistributionSummary: + """ + Create status-grouped statistical summary from values and status types. + + Groups numerical values by request status and calculates complete + statistical summaries for each category. Enables performance analysis + across different request outcomes. + + :param value_types: Status type for each value ("successful", "incomplete", + or "error") + :param values: Numerical values representing the distribution + :param weights: Optional weights for each value (defaults to equal weighting) + :param include_cdf: Whether to include cumulative distribution functions + :return: StatusDistributionSummary with statistics grouped by status + :raises ValueError: If input lists have mismatched lengths or invalid + status types + """ + if any( + type_ not in {"successful", "incomplete", "error"} for type_ in value_types + ): + raise ValueError( + "value_types must be one of 'successful', 'incomplete', or 'error'. " + f"Got {value_types} instead.", + ) + + if weights is None: + weights = [1.0] * len(values) + + if len(value_types) != len(values) or len(value_types) != len(weights): + raise ValueError( + "The length of value_types, values, and weights must be the same.", + ) + + _, successful_values, successful_weights = ( + zip(*successful) + if ( + successful := list( + filter( + lambda val: val[0] == "successful", + zip(value_types, values, weights), + ) + ) + ) + else ([], [], []) + ) + _, incomplete_values, incomplete_weights = ( + zip(*incomplete) + if ( + incomplete := list( + filter( + lambda val: val[0] == "incomplete", + zip(value_types, values, weights), + ) + ) + ) + else ([], [], []) + ) + _, errored_values, errored_weights = ( + zip(*errored) + if ( + errored := list( + filter( + lambda val: val[0] == "error", + zip(value_types, values, weights), + ) + ) + ) + else ([], [], []) + ) + + return StatusDistributionSummary( + total=DistributionSummary.from_values( + values, + weights, + include_cdf=include_cdf, + ), + successful=DistributionSummary.from_values( + successful_values, # type: ignore[arg-type] + successful_weights, # type: ignore[arg-type] + include_cdf=include_cdf, + ), + incomplete=DistributionSummary.from_values( + incomplete_values, # type: ignore[arg-type] + incomplete_weights, # type: ignore[arg-type] + include_cdf=include_cdf, + ), + errored=DistributionSummary.from_values( + errored_values, # type: ignore[arg-type] + errored_weights, # type: ignore[arg-type] + include_cdf=include_cdf, + ), + ) + + @staticmethod + def from_request_times( + request_types: list[Literal["successful", "incomplete", "error"]], + requests: list[tuple[float, float]], + distribution_type: Literal["concurrency", "rate"], + include_cdf: bool = False, + epsilon: float = 1e-6, + ) -> StatusDistributionSummary: + """ + Create status-grouped statistical summary from request timing data. + + Analyzes request timings grouped by status to calculate concurrency or + rate distributions for each outcome category. Enables comparative + performance analysis across successful, incomplete, and errored requests. + + :param request_types: Status type for each request ("successful", + "incomplete", or "error") + :param requests: List of (start_time, end_time) tuples for each request + :param distribution_type: Analysis type - "concurrency" or "rate" + :param include_cdf: Whether to include cumulative distribution functions + :param epsilon: Threshold for merging close timing events + :return: StatusDistributionSummary with timing statistics by status + :raises ValueError: If input lists have mismatched lengths or invalid types + """ + if distribution_type not in {"concurrency", "rate"}: + raise ValueError( + f"Invalid distribution_type '{distribution_type}'. " + "Must be 'concurrency' or 'rate'." + ) + + if any( + type_ not in {"successful", "incomplete", "error"} + for type_ in request_types + ): + raise ValueError( + "request_types must be one of 'successful', 'incomplete', or 'error'. " + f"Got {request_types} instead.", + ) + + if len(request_types) != len(requests): + raise ValueError( + "The length of request_types and requests must be the same. " + f"Got {len(request_types)} and {len(requests)} instead.", + ) + + _, successful_requests = ( + zip(*successful) + if ( + successful := list( + filter( + lambda val: val[0] == "successful", + zip(request_types, requests), + ) + ) + ) + else ([], []) + ) + _, incomplete_requests = ( + zip(*incomplete) + if ( + incomplete := list( + filter( + lambda val: val[0] == "incomplete", + zip(request_types, requests), + ) + ) + ) + else ([], []) + ) + _, errored_requests = ( + zip(*errored) + if ( + errored := list( + filter( + lambda val: val[0] == "error", + zip(request_types, requests), + ) + ) + ) + else ([], []) + ) + + return StatusDistributionSummary( + total=DistributionSummary.from_request_times( + requests, + distribution_type=distribution_type, + include_cdf=include_cdf, + epsilon=epsilon, + ), + successful=DistributionSummary.from_request_times( + successful_requests, # type: ignore[arg-type] + distribution_type=distribution_type, + include_cdf=include_cdf, + epsilon=epsilon, + ), + incomplete=DistributionSummary.from_request_times( + incomplete_requests, # type: ignore[arg-type] + distribution_type=distribution_type, + include_cdf=include_cdf, + epsilon=epsilon, + ), + errored=DistributionSummary.from_request_times( + errored_requests, # type: ignore[arg-type] + distribution_type=distribution_type, + include_cdf=include_cdf, + epsilon=epsilon, + ), + ) + + @staticmethod + def from_iterable_request_times( + request_types: list[Literal["successful", "incomplete", "error"]], + requests: list[tuple[float, float]], + first_iter_times: list[float], + iter_counts: list[int] | None = None, + first_iter_counts: list[int] | None = None, + include_cdf: bool = False, + epsilon: float = 1e-6, + ) -> StatusDistributionSummary: + """ + Create status-grouped statistical summary from iterative request timing data. + + Analyzes autoregressive request timings grouped by status to calculate + iteration rate distributions for each outcome category. Enables comparative + analysis of token generation or streaming response performance across + different request statuses. + + :param request_types: Status type for each request ("successful", + "incomplete", or "error") + :param requests: List of (start_time, end_time) tuples for each request + :param first_iter_times: Times when first iteration was received for + each request + :param iter_counts: Total iteration counts for each request (defaults to 1) + :param first_iter_counts: Iteration counts for first iteration (defaults + to 1) + :param include_cdf: Whether to include cumulative distribution functions + :param epsilon: Threshold for merging close timing events + :return: StatusDistributionSummary with iteration statistics by status + :raises ValueError: If input lists have mismatched lengths or invalid types + """ + if any( + type_ not in {"successful", "incomplete", "error"} + for type_ in request_types + ): + raise ValueError( + "request_types must be one of 'successful', 'incomplete', or 'error'. " + f"Got {request_types} instead.", + ) + + if iter_counts is None: + iter_counts = [1] * len(requests) + + if first_iter_counts is None: + first_iter_counts = [1] * len(requests) + + if ( + len(request_types) != len(requests) + or len(requests) != len(first_iter_times) + or len(requests) != len(iter_counts) + or len(requests) != len(first_iter_counts) + ): + raise ValueError( + "request_types, requests, first_iter_times, iter_counts, and " + "first_iter_counts must be the same length." + f"Given {len(request_types)}, {len(requests)}, " + f"{len(first_iter_times)}, {len(iter_counts)}, " + f"{len(first_iter_counts)}", + ) + + ( + _, + successful_requests, + successful_first_iter_times, + successful_iter_counts, + successful_first_iter_counts, + ) = ( + zip(*successful) + if ( + successful := list( + filter( + lambda val: val[0] == "successful", + zip( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + ), + ) + ) + ) + else ([], [], [], [], []) + ) + ( + _, + incomplete_requests, + incomplete_first_iter_times, + incomplete_iter_counts, + incomplete_first_iter_counts, + ) = ( + zip(*incomplete) + if ( + incomplete := list( + filter( + lambda val: val[0] == "incomplete", + zip( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + ), + ) + ) + ) + else ([], [], [], [], []) + ) + ( + _, + errored_requests, + errored_first_iter_times, + errored_iter_counts, + errored_first_iter_counts, + ) = ( + zip(*errored) + if ( + errored := list( + filter( + lambda val: val[0] == "error", + zip( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + ), + ) + ) + ) + else ([], [], [], [], []) + ) + + return StatusDistributionSummary( + total=DistributionSummary.from_iterable_request_times( + requests, + first_iter_times, + iter_counts, + first_iter_counts, + include_cdf=include_cdf, + epsilon=epsilon, + ), + successful=DistributionSummary.from_iterable_request_times( + successful_requests, # type: ignore[arg-type] + successful_first_iter_times, # type: ignore[arg-type] + successful_iter_counts, # type: ignore[arg-type] + successful_first_iter_counts, # type: ignore[arg-type] + include_cdf=include_cdf, + epsilon=epsilon, + ), + incomplete=DistributionSummary.from_iterable_request_times( + incomplete_requests, # type: ignore[arg-type] + incomplete_first_iter_times, # type: ignore[arg-type] + incomplete_iter_counts, # type: ignore[arg-type] + incomplete_first_iter_counts, # type: ignore[arg-type] + include_cdf=include_cdf, + epsilon=epsilon, + ), + errored=DistributionSummary.from_iterable_request_times( + errored_requests, # type: ignore[arg-type] + errored_first_iter_times, # type: ignore[arg-type] + errored_iter_counts, # type: ignore[arg-type] + errored_first_iter_counts, # type: ignore[arg-type] + include_cdf=include_cdf, + epsilon=epsilon, + ), + ) + + +class RunningStats(StandardBaseModel): + """ + Real-time statistics tracking for streaming numerical data. + + Maintains mean, rate, and cumulative statistics for continuous data streams + without storing individual values. Optimized for memory efficiency in + long-running monitoring applications. Supports arithmetic operators for + convenient value addition and provides computed properties for derived metrics. + + Example: + :: + stats = RunningStats() + stats += 10.5 # Add value using operator + stats.update(20.0, count=3) # Add value with custom count + print(f"Mean: {stats.mean}, Rate: {stats.rate}") + """ + + start_time: float = Field( + default_factory=timer.time, + description=( + "The time the running statistics object was created. " + "This is used to calculate the rate of the statistics." + ), + ) + count: int = Field( + default=0, + description="The number of values added to the running statistics.", + ) + total: float = Field( + default=0.0, + description="The total sum of the values added to the running statistics.", + ) + last: float = Field( + default=0.0, + description="The last value added to the running statistics.", + ) + + @computed_field # type: ignore[misc] + @property + def mean(self) -> float: + """ + :return: The mean of the running statistics (total / count). + If count is 0, return 0.0. + """ + if self.count == 0: + return 0.0 + return self.total / self.count + + @computed_field # type: ignore[misc] + @property + def rate(self) -> float: + """ + :return: The rate of the running statistics + (total / (time.time() - start_time)). + If count is 0, return 0.0. + """ + if self.count == 0: + return 0.0 + return self.total / (timer.time() - self.start_time) + + def __add__(self, value: Any) -> float: + """ + Add value using + operator and return current mean. + + :param value: Numerical value to add to the running statistics + :return: Updated mean after adding the value + :raises ValueError: If value is not numeric (int or float) + """ + if not isinstance(value, (int, float)): + raise ValueError( + f"Value must be an int or float, got {type(value)} instead.", + ) + + self.update(value) + + return self.mean + + def __iadd__(self, value: Any) -> RunningStats: + """ + Add value using += operator and return updated instance. + + :param value: Numerical value to add to the running statistics + :return: Self reference for method chaining + :raises ValueError: If value is not numeric (int or float) + """ + if not isinstance(value, (int, float)): + raise ValueError( + f"Value must be an int or float, got {type(value)} instead.", + ) + + self.update(value) + + return self + + def update(self, value: float, count: int = 1) -> None: + """ + Update running statistics with new value and count. + + :param value: Numerical value to add to the running statistics + :param count: Number of occurrences to count for this value (defaults to 1) + """ + self.count += count + self.total += value + self.last = value + + +class TimeRunningStats(RunningStats): + """ + Specialized running statistics for time-based measurements. + + Extends RunningStats with time-specific computed properties for millisecond + conversions. Designed for tracking latency, duration, and timing metrics in + performance monitoring applications. + + Example: + :: + time_stats = TimeRunningStats() + time_stats += 0.125 # Add 125ms in seconds + print(f"Mean: {time_stats.mean_ms}ms, Total: {time_stats.total_ms}ms") + """ + + @computed_field # type: ignore[misc] + @property + def total_ms(self) -> float: + """ + :return: The total time multiplied by 1000.0 to convert to milliseconds. + """ + return self.total * 1000.0 + + @computed_field # type: ignore[misc] + @property + def last_ms(self) -> float: + """ + :return: The last time multiplied by 1000.0 to convert to milliseconds. + """ + return self.last * 1000.0 + + @computed_field # type: ignore[misc] + @property + def mean_ms(self) -> float: + """ + :return: The mean time multiplied by 1000.0 to convert to milliseconds. + """ + return self.mean * 1000.0 + + @computed_field # type: ignore[misc] + @property + def rate_ms(self) -> float: + """ + :return: The rate of the running statistics multiplied by 1000.0 + to convert to milliseconds. + """ + return self.rate * 1000.0 diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py new file mode 100644 index 00000000..50f18ce3 --- /dev/null +++ b/tests/unit/utils/test_text.py @@ -0,0 +1,531 @@ +from __future__ import annotations + +import gzip +import tempfile +from pathlib import Path +from unittest.mock import Mock, patch + +import httpx +import pytest + +from guidellm.utils.text import ( + MAX_PATH_LENGTH, + EndlessTextCreator, + clean_text, + filter_text, + format_value_display, + is_puncutation, + load_text, + split_text, + split_text_list_by_length, +) + + +def test_max_path_length(): + """Test that MAX_PATH_LENGTH is correctly defined.""" + assert isinstance(MAX_PATH_LENGTH, int) + assert MAX_PATH_LENGTH == 4096 + + +class TestFormatValueDisplay: + """Test suite for format_value_display.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ( + "value", + "label", + "units", + "total_characters", + "digits_places", + "decimal_places", + "expected", + ), + [ + (42.0, "test", "", None, None, None, "42 [info]test[/info]"), + (42.5, "test", "ms", None, None, 1, "42.5ms [info]test[/info]"), + (42.123, "test", "", None, 5, 2, " 42.12 [info]test[/info]"), + ( + 42.0, + "test", + "ms", + 30, + None, + 0, + " 42ms [info]test[/info]", + ), + ], + ) + def test_invocation( + self, + value, + label, + units, + total_characters, + digits_places, + decimal_places, + expected, + ): + """Test format_value_display with various parameters.""" + result = format_value_display( + value=value, + label=label, + units=units, + total_characters=total_characters, + digits_places=digits_places, + decimal_places=decimal_places, + ) + assert label in result + assert units in result + value_check = ( + str(int(value)) + if decimal_places == 0 + else ( + f"{value:.{decimal_places}f}" + if decimal_places is not None + else str(value) + ) + ) + assert value_check in result or str(value) in result + + @pytest.mark.sanity + @pytest.mark.parametrize( + ("value", "label"), + [ + (None, "test"), + (42.0, None), + ("not_number", "test"), + ], + ) + def test_invocation_with_none_values(self, value, label): + """Test format_value_display with None/invalid inputs still works.""" + result = format_value_display(value, label) + assert isinstance(result, str) + if label is not None: + assert str(label) in result + if value is not None: + assert str(value) in result + + +class TestSplitTextListByLength: + """Test suite for split_text_list_by_length.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ( + "text_list", + "max_characters", + "pad_horizontal", + "pad_vertical", + "expected_structure", + ), + [ + ( + ["hello world", "test"], + 5, + False, + False, + [["hello", "world"], ["test"]], + ), + ( + ["short", "longer text"], + [5, 10], + True, + True, + [[" short"], ["longer", "text"]], + ), + ( + ["a", "b", "c"], + 10, + True, + True, + [[" a"], [" b"], [" c"]], + ), + ], + ) + def test_invocation( + self, + text_list, + max_characters, + pad_horizontal, + pad_vertical, + expected_structure, + ): + """Test split_text_list_by_length with various parameters.""" + result = split_text_list_by_length( + text_list, max_characters, pad_horizontal, pad_vertical + ) + assert len(result) == len(text_list) + if pad_vertical: + max_lines = max(len(lines) for lines in result) + assert all(len(lines) == max_lines for lines in result) + + @pytest.mark.sanity + def test_invalid_max_characters_length(self): + """Test split_text_list_by_length with mismatched max_characters length.""" + error_msg = "max_characters must be a list of the same length" + with pytest.raises(ValueError, match=error_msg): + split_text_list_by_length(["a", "b"], [5, 10, 15]) + + @pytest.mark.sanity + @pytest.mark.parametrize( + ("text_list", "max_characters"), + [ + (None, 5), + (["test"], None), + (["test"], []), + ], + ) + def test_invalid_invocation(self, text_list, max_characters): + """Test split_text_list_by_length with invalid inputs.""" + with pytest.raises((TypeError, ValueError)): + split_text_list_by_length(text_list, max_characters) + + +class TestFilterText: + """Test suite for filter_text.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("text", "filter_start", "filter_end", "expected"), + [ + ("hello world test", "world", None, "world test"), + ("hello world test", None, "world", "hello "), + ("hello world test", "hello", "test", "hello world "), + ("hello world test", 6, 11, "world test"), + ("hello world test", 0, 5, "hello"), + ("hello world test", None, None, "hello world test"), + ], + ) + def test_invocation(self, text, filter_start, filter_end, expected): + """Test filter_text with various start and end markers.""" + result = filter_text(text, filter_start, filter_end) + assert result == expected + + @pytest.mark.sanity + @pytest.mark.parametrize( + ("text", "filter_start", "filter_end"), + [ + ("hello", "notfound", None), + ("hello", None, "notfound"), + ("hello", "invalid_type", None), + ("hello", None, "invalid_type"), + ], + ) + def test_invalid_invocation(self, text, filter_start, filter_end): + """Test filter_text with invalid markers.""" + with pytest.raises((ValueError, TypeError)): + filter_text(text, filter_start, filter_end) + + +class TestCleanText: + """Test suite for clean_text.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("text", "expected"), + [ + ("hello world", "hello world"), + (" hello\n\nworld ", "hello world"), + ("hello\tworld\r\ntest", "hello world test"), + ("", ""), + (" ", ""), + ], + ) + def test_invocation(self, text, expected): + """Test clean_text with various whitespace scenarios.""" + result = clean_text(text) + assert result == expected + + @pytest.mark.sanity + @pytest.mark.parametrize( + "text", + [ + None, + 123, + ], + ) + def test_invalid_invocation(self, text): + """Test clean_text with invalid inputs.""" + with pytest.raises((TypeError, AttributeError)): + clean_text(text) + + +class TestSplitText: + """Test suite for split_text.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("text", "split_punctuation", "expected"), + [ + ("hello world", False, ["hello", "world"]), + ("hello, world!", True, ["hello", ",", "world", "!"]), + ("test.example", False, ["test.example"]), + ("test.example", True, ["test", ".", "example"]), + ("", False, []), + ], + ) + def test_invocation(self, text, split_punctuation, expected): + """Test split_text with various punctuation options.""" + result = split_text(text, split_punctuation) + assert result == expected + + @pytest.mark.sanity + @pytest.mark.parametrize( + "text", + [ + None, + 123, + ], + ) + def test_invalid_invocation(self, text): + """Test split_text with invalid inputs.""" + with pytest.raises((TypeError, AttributeError)): + split_text(text) + + +class TestLoadText: + """Test suite for load_text.""" + + @pytest.mark.smoke + def test_empty_data(self): + """Test load_text with empty data.""" + result = load_text("") + assert result == "" + + @pytest.mark.smoke + def test_raw_text(self): + """Test load_text with raw text that's not a file.""" + long_text = "a" * (MAX_PATH_LENGTH + 1) + result = load_text(long_text) + assert result == long_text + + @pytest.mark.smoke + def test_local_file(self): + """Test load_text with local file.""" + with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as tmp: + test_content = "test file content" + tmp.write(test_content) + tmp.flush() + + result = load_text(tmp.name) + assert result == test_content + + Path(tmp.name).unlink() + + @pytest.mark.smoke + def test_gzipped_file(self): + """Test load_text with gzipped file.""" + with tempfile.NamedTemporaryFile(delete=False, suffix=".gz") as tmp: + test_content = "test gzipped content" + with gzip.open(tmp.name, "wt") as gzf: + gzf.write(test_content) + + result = load_text(tmp.name) + assert result == test_content + + Path(tmp.name).unlink() + + @pytest.mark.smoke + @patch("httpx.Client") + def test_url_loading(self, mock_client): + """Test load_text with HTTP URL.""" + mock_response = Mock() + mock_response.text = "url content" + mock_client.return_value.__enter__.return_value.get.return_value = mock_response + + result = load_text("http://example.com/test.txt") + assert result == "url content" + + @pytest.mark.smoke + @patch("guidellm.utils.text.files") + @patch("guidellm.utils.text.as_file") + def test_package_data_loading(self, mock_as_file, mock_files): + """Test load_text with package data.""" + mock_resource = Mock() + mock_files.return_value.joinpath.return_value = mock_resource + + mock_file = Mock() + mock_file.read.return_value = "package data content" + mock_as_file.return_value.__enter__.return_value = mock_file + + with patch("gzip.open") as mock_gzip: + mock_gzip.return_value.__enter__.return_value = mock_file + result = load_text("data:test.txt") + assert result == "package data content" + + @pytest.mark.sanity + def test_nonexistent_file(self): + """Test load_text with nonexistent file returns the path as raw text.""" + result = load_text("/nonexistent/path/file.txt") + assert result == "/nonexistent/path/file.txt" + + @pytest.mark.sanity + @patch("httpx.Client") + def test_url_error(self, mock_client): + """Test load_text with HTTP error.""" + mock_client.return_value.__enter__.return_value.get.side_effect = ( + httpx.HTTPStatusError("HTTP error", request=None, response=None) + ) + + with pytest.raises(httpx.HTTPStatusError): + load_text("http://example.com/error.txt") + + +class TestIsPuncutation: + """Test suite for is_puncutation.""" + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("text", "expected"), + [ + (".", True), + (",", True), + ("!", True), + ("?", True), + (";", True), + ("a", False), + ("1", False), + (" ", False), + ("ab", False), + ("", False), + ], + ) + def test_invocation(self, text, expected): + """Test is_puncutation with various characters.""" + result = is_puncutation(text) + assert result == expected + + @pytest.mark.sanity + @pytest.mark.parametrize( + "text", + [ + None, + 123, + ], + ) + def test_invalid_invocation(self, text): + """Test is_puncutation with invalid inputs.""" + with pytest.raises((TypeError, AttributeError)): + is_puncutation(text) + + +class TestEndlessTextCreator: + """Test suite for EndlessTextCreator.""" + + @pytest.fixture( + params=[ + { + "data": "hello world test", + "filter_start": None, + "filter_end": None, + }, + { + "data": "hello world test", + "filter_start": "world", + "filter_end": None, + }, + {"data": "one two three four", "filter_start": 0, "filter_end": 9}, + ], + ids=["no_filter", "string_filter", "index_filter"], + ) + def valid_instances(self, request): + """Fixture providing test data for EndlessTextCreator.""" + constructor_args = request.param + instance = EndlessTextCreator(**constructor_args) + return instance, constructor_args + + @pytest.mark.smoke + def test_class_signatures(self): + """Test EndlessTextCreator signatures and methods.""" + assert hasattr(EndlessTextCreator, "__init__") + assert hasattr(EndlessTextCreator, "create_text") + instance = EndlessTextCreator("test") + assert hasattr(instance, "data") + assert hasattr(instance, "text") + assert hasattr(instance, "filtered_text") + assert hasattr(instance, "words") + + @pytest.mark.smoke + def test_initialization(self, valid_instances): + """Test EndlessTextCreator initialization.""" + instance, constructor_args = valid_instances + assert isinstance(instance, EndlessTextCreator) + assert instance.data == constructor_args["data"] + assert isinstance(instance.text, str) + assert isinstance(instance.filtered_text, str) + assert isinstance(instance.words, list) + + @pytest.mark.sanity + @pytest.mark.parametrize( + ("data", "filter_start", "filter_end"), + [ + ("test", "notfound", None), + ], + ) + def test_invalid_initialization_values(self, data, filter_start, filter_end): + """Test EndlessTextCreator with invalid initialization values.""" + with pytest.raises((TypeError, ValueError)): + EndlessTextCreator(data, filter_start, filter_end) + + @pytest.mark.smoke + def test_initialization_with_none(self): + """Test EndlessTextCreator handles None data gracefully.""" + instance = EndlessTextCreator(None) + assert isinstance(instance, EndlessTextCreator) + assert instance.data is None + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("start", "length", "expected_length"), + [ + (0, 5, 5), + (2, 3, 3), + (0, 0, 0), + ], + ) + def test_create_text(self, valid_instances, start, length, expected_length): + """Test EndlessTextCreator.create_text.""" + instance, constructor_args = valid_instances + result = instance.create_text(start, length) + assert isinstance(result, str) + if length > 0 and instance.words: + assert len(result) > 0 + + @pytest.mark.smoke + def test_create_text_cycling(self): + """Test EndlessTextCreator.create_text cycling behavior.""" + instance = EndlessTextCreator("one two three") + result1 = instance.create_text(0, 3) + result2 = instance.create_text(3, 3) + assert isinstance(result1, str) + assert isinstance(result2, str) + + @pytest.mark.sanity + @pytest.mark.parametrize( + ("start", "length"), + [ + ("invalid", 5), + (0, "invalid"), + ], + ) + def test_create_text_invalid(self, valid_instances, start, length): + """Test EndlessTextCreator.create_text with invalid inputs.""" + instance, constructor_args = valid_instances + with pytest.raises((TypeError, ValueError)): + instance.create_text(start, length) + + @pytest.mark.smoke + @pytest.mark.parametrize( + ("start", "length", "min_length"), + [ + (-1, 5, 0), + (0, -1, 0), + ], + ) + def test_create_text_edge_cases(self, valid_instances, start, length, min_length): + """Test EndlessTextCreator.create_text with edge cases.""" + instance, constructor_args = valid_instances + result = instance.create_text(start, length) + assert isinstance(result, str) + assert len(result) >= min_length From 36fe703a7283bcc12bcebc9160ad9fa08ecc6197 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Wed, 20 Aug 2025 21:51:05 -0400 Subject: [PATCH 4/9] Update tests/unit/utils/test_text.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Mark Kurtz --- tests/unit/utils/test_text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py index 50f18ce3..5e69fdf6 100644 --- a/tests/unit/utils/test_text.py +++ b/tests/unit/utils/test_text.py @@ -372,7 +372,7 @@ def test_url_error(self, mock_client): load_text("http://example.com/error.txt") -class TestIsPuncutation: +class TestIsPunctuation: """Test suite for is_puncutation.""" @pytest.mark.smoke From 72cdc5c8d756813ba2a1ff68c0f761af28934947 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Thu, 21 Aug 2025 01:49:43 +0000 Subject: [PATCH 5/9] minor fixes and updates --- src/guidellm/utils/__init__.py | 4 +- src/guidellm/utils/text.py | 8 +- tests/unit/utils/test_statistics.py | 1618 +++++++++++---------------- tests/unit/utils/test_text.py | 10 +- 4 files changed, 678 insertions(+), 962 deletions(-) diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 0a1ff10d..576fe64d 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -37,7 +37,7 @@ EndlessTextCreator, clean_text, filter_text, - is_puncutation, + is_punctuation, load_text, split_text, split_text_list_by_length, @@ -67,7 +67,7 @@ "check_load_processor", "clean_text", "filter_text", - "is_puncutation", + "is_punctuation", "load_text", "safe_add", "safe_divide", diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py index fd43fa41..beebfe37 100644 --- a/src/guidellm/utils/text.py +++ b/src/guidellm/utils/text.py @@ -23,7 +23,7 @@ from guidellm import data as package_data from guidellm.config import settings -from guidellm.utils.console import Colors +from guidellm.utils.colors import Colors __all__ = [ "MAX_PATH_LENGTH", @@ -31,7 +31,7 @@ "clean_text", "filter_text", "format_value_display", - "is_puncutation", + "is_punctuation", "load_text", "split_text", "split_text_list_by_length", @@ -268,7 +268,7 @@ def load_text(data: str | Path, encoding: str | None = None) -> str: return data.read_text(encoding=encoding) -def is_puncutation(text: str) -> bool: +def is_punctuation(text: str) -> bool: """ Check if a single character is a punctuation mark. @@ -332,7 +332,7 @@ def create_text(self, start: int, length: int) -> str: index = (start + counter) % len(self.words) add_word = self.words[index] - if counter != 0 and not is_puncutation(add_word): + if counter != 0 and not is_punctuation(add_word): text += " " text += add_word diff --git a/tests/unit/utils/test_statistics.py b/tests/unit/utils/test_statistics.py index c820de9d..fa8cccd0 100644 --- a/tests/unit/utils/test_statistics.py +++ b/tests/unit/utils/test_statistics.py @@ -1,990 +1,706 @@ -""" -Statistical analysis utilities for distribution calculations and running metrics. - -Provides comprehensive statistical computation tools for analyzing numerical -distributions, percentiles, and streaming data. Includes specialized support for -request timing analysis, concurrency measurement, and rate calculations. Integrates -with Pydantic for serializable statistical models and supports both weighted and -unweighted distributions with cumulative distribution function (CDF) generation. -""" - -from __future__ import annotations - import math -import time as timer -from collections import defaultdict -from typing import Any, Literal +import time +from typing import Literal import numpy as np -from pydantic import Field, computed_field - -from guidellm.utils.pydantic_utils import StandardBaseModel, StatusBreakdown +import pytest + +from guidellm.objects import ( + DistributionSummary, + Percentiles, + RunningStats, + StatusDistributionSummary, + TimeRunningStats, +) + + +def create_default_percentiles() -> Percentiles: + return Percentiles( + p001=0.1, + p01=1.0, + p05=5.0, + p10=10.0, + p25=25.0, + p50=50.0, + p75=75.0, + p90=90.0, + p95=95.0, + p99=99.0, + p999=99.9, + ) -__all__ = [ - "DistributionSummary", - "Percentiles", - "RunningStats", - "StatusDistributionSummary", - "TimeRunningStats", -] +def create_default_distribution_summary() -> DistributionSummary: + return DistributionSummary( + mean=50.0, + median=50.0, + mode=50.0, + variance=835, + std_dev=math.sqrt(835), + min=0.0, + max=100.0, + count=1001, + total_sum=50050.0, + percentiles=create_default_percentiles(), + ) -class Percentiles(StandardBaseModel): - """ - Standard percentiles model for statistical distribution analysis. - Provides complete percentile coverage from 0.1th to 99.9th percentiles for - statistical distribution characterization. Used as a component within - DistributionSummary to provide detailed distribution shape analysis. - """ +@pytest.mark.smoke +def test_percentiles_initialization(): + percentiles = create_default_percentiles() + assert percentiles.p001 == 0.1 + assert percentiles.p01 == 1.0 + assert percentiles.p05 == 5.0 + assert percentiles.p10 == 10.0 + assert percentiles.p25 == 25.0 + assert percentiles.p50 == 50.0 + assert percentiles.p75 == 75.0 + assert percentiles.p90 == 90.0 + assert percentiles.p95 == 95.0 + assert percentiles.p99 == 99.0 + assert percentiles.p999 == 99.9 + + +@pytest.mark.smoke +def test_percentiles_invalid_initialization(): + test_kwargs = { + "p001": 0.1, + "p01": 1.0, + "p05": 5.0, + "p10": 10.0, + "p25": 25.0, + "p50": 50.0, + "p75": 75.0, + "p90": 90.0, + "p95": 95.0, + "p99": 99.0, + "p999": 99.9, + } + test_missing_keys = list(test_kwargs.keys()) + + for missing_key in test_missing_keys: + kwargs = {key: val for key, val in test_kwargs.items() if key != missing_key} + with pytest.raises(ValueError): + Percentiles(**kwargs) + + +@pytest.mark.smoke +def test_percentiles_marshalling(): + percentiles = create_default_percentiles() + serialized = percentiles.model_dump() + deserialized = Percentiles.model_validate(serialized) + + for key, value in vars(percentiles).items(): + assert getattr(deserialized, key) == value + + +@pytest.mark.smoke +def test_distribution_summary_initilaization(): + distribution_summary = create_default_distribution_summary() + assert distribution_summary.mean == 50.0 + assert distribution_summary.median == 50.0 + assert distribution_summary.mode == 50.0 + assert distribution_summary.variance == 835 + assert distribution_summary.std_dev == math.sqrt(835) + assert distribution_summary.min == 0.0 + assert distribution_summary.max == 100.0 + assert distribution_summary.count == 1001 + assert distribution_summary.total_sum == 50050.0 + assert distribution_summary.percentiles.p001 == 0.1 + assert distribution_summary.percentiles.p01 == 1.0 + assert distribution_summary.percentiles.p05 == 5.0 + assert distribution_summary.percentiles.p10 == 10.0 + assert distribution_summary.percentiles.p25 == 25.0 + assert distribution_summary.percentiles.p50 == 50.0 + assert distribution_summary.percentiles.p75 == 75.0 + assert distribution_summary.percentiles.p90 == 90.0 + assert distribution_summary.percentiles.p95 == 95.0 + assert distribution_summary.percentiles.p99 == 99.0 + assert distribution_summary.percentiles.p999 == 99.9 + + +@pytest.mark.smoke +def test_distribution_summary_invalid_initialization(): + test_kwargs = { + "mean": 50.0, + "median": 50.0, + "mode": 50.0, + "variance": 835, + "std_dev": math.sqrt(835), + "min": 0.0, + "max": 100.0, + "count": 1001, + "total_sum": 50050.0, + "percentiles": create_default_percentiles(), + } + test_missing_keys = list(test_kwargs.keys()) + for missing_key in test_missing_keys: + kwargs = {key: val for key, val in test_kwargs.items() if key != missing_key} + with pytest.raises(ValueError): + DistributionSummary(**kwargs) # type: ignore[arg-type] + + +@pytest.mark.smoke +def test_distribution_summary_marshalling(): + distribution_summary = create_default_distribution_summary() + serialized = distribution_summary.model_dump() + deserialized = DistributionSummary.model_validate(serialized) + + for key, value in vars(distribution_summary).items(): + assert getattr(deserialized, key) == value + + +@pytest.mark.smoke +def test_distribution_summary_from_distribution_function(): + values = [val / 10.0 for val in range(1001)] + distribution = [(val, 1.0) for val in values] + distribution_summary = DistributionSummary.from_distribution_function(distribution) + assert distribution_summary.mean == pytest.approx(np.mean(values)) + assert distribution_summary.median == pytest.approx(np.median(values)) + assert distribution_summary.mode == 0.0 + assert distribution_summary.variance == pytest.approx(np.var(values, ddof=0)) + assert distribution_summary.std_dev == pytest.approx(np.std(values, ddof=0)) + assert distribution_summary.min == min(values) + assert distribution_summary.max == max(values) + assert distribution_summary.count == len(values) + assert distribution_summary.total_sum == sum(values) + assert distribution_summary.percentiles.p001 == pytest.approx( + np.percentile(values, 0.1) + ) + assert distribution_summary.percentiles.p01 == pytest.approx( + np.percentile(values, 1.0) + ) + assert distribution_summary.percentiles.p05 == pytest.approx( + np.percentile(values, 5.0) + ) + assert distribution_summary.percentiles.p10 == pytest.approx( + np.percentile(values, 10.0) + ) + assert distribution_summary.percentiles.p25 == pytest.approx( + np.percentile(values, 25.0) + ) + assert distribution_summary.percentiles.p50 == pytest.approx( + np.percentile(values, 50.0) + ) + assert distribution_summary.percentiles.p75 == pytest.approx( + np.percentile(values, 75.0) + ) + assert distribution_summary.percentiles.p90 == pytest.approx( + np.percentile(values, 90.0) + ) + assert distribution_summary.percentiles.p95 == pytest.approx( + np.percentile(values, 95.0) + ) + assert distribution_summary.percentiles.p99 == pytest.approx( + np.percentile(values, 99.0) + ) + assert distribution_summary.percentiles.p999 == pytest.approx( + np.percentile(values, 99.9) + ) + assert distribution_summary.cumulative_distribution_function is None - p001: float = Field( - description="The 0.1th percentile of the distribution.", + distribution_summary_cdf = DistributionSummary.from_distribution_function( + distribution, include_cdf=True + ) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == len(values) + + +def test_distribution_summary_from_values(): + values = [val / 10 for val in range(1001)] + distribution_summary = DistributionSummary.from_values(values) + assert distribution_summary.mean == pytest.approx(np.mean(values)) + assert distribution_summary.median == pytest.approx(np.median(values)) + assert distribution_summary.mode == 0.0 + assert distribution_summary.variance == pytest.approx(np.var(values, ddof=0)) + assert distribution_summary.std_dev == pytest.approx(np.std(values, ddof=0)) + assert distribution_summary.min == min(values) + assert distribution_summary.max == max(values) + assert distribution_summary.count == len(values) + assert distribution_summary.total_sum == sum(values) + assert distribution_summary.percentiles.p001 == pytest.approx( + np.percentile(values, 0.1) + ) + assert distribution_summary.percentiles.p01 == pytest.approx( + np.percentile(values, 1.0) + ) + assert distribution_summary.percentiles.p05 == pytest.approx( + np.percentile(values, 5.0) + ) + assert distribution_summary.percentiles.p10 == pytest.approx( + np.percentile(values, 10.0) + ) + assert distribution_summary.percentiles.p25 == pytest.approx( + np.percentile(values, 25.0) + ) + assert distribution_summary.percentiles.p50 == pytest.approx( + np.percentile(values, 50.0) ) - p01: float = Field( - description="The 1st percentile of the distribution.", + assert distribution_summary.percentiles.p75 == pytest.approx( + np.percentile(values, 75.0) ) - p05: float = Field( - description="The 5th percentile of the distribution.", + assert distribution_summary.percentiles.p90 == pytest.approx( + np.percentile(values, 90.0) ) - p10: float = Field( - description="The 10th percentile of the distribution.", + assert distribution_summary.percentiles.p95 == pytest.approx( + np.percentile(values, 95.0) ) - p25: float = Field( - description="The 25th percentile of the distribution.", + assert distribution_summary.percentiles.p99 == pytest.approx( + np.percentile(values, 99.0) ) - p50: float = Field( - description="The 50th percentile of the distribution.", + assert distribution_summary.percentiles.p999 == pytest.approx( + np.percentile(values, 99.9) ) - p75: float = Field( - description="The 75th percentile of the distribution.", + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_weights = DistributionSummary.from_values( + values, weights=[2] * len(values) + ) + assert distribution_summary_weights.mean == pytest.approx(np.mean(values)) + assert distribution_summary_weights.median == pytest.approx(np.median(values)) + assert distribution_summary_weights.mode == 0.0 + assert distribution_summary_weights.variance == pytest.approx( + np.var(values, ddof=0) ) - p90: float = Field( - description="The 90th percentile of the distribution.", + assert distribution_summary_weights.std_dev == pytest.approx(np.std(values, ddof=0)) + assert distribution_summary_weights.min == min(values) + assert distribution_summary_weights.max == max(values) + assert distribution_summary_weights.count == len(values) + assert distribution_summary_weights.total_sum == sum(values) + assert distribution_summary_weights.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_values(values, include_cdf=True) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == len(values) + + +def test_distribution_summary_from_request_times_concurrency(): + # create consistent timestamped values matching a rate of 10 per second + requests = [(val / 10, val / 10 + 1) for val in range(10001)] + distribution_summary = DistributionSummary.from_request_times( + requests, distribution_type="concurrency" ) - p95: float = Field( - description="The 95th percentile of the distribution.", + assert distribution_summary.mean == pytest.approx(10.0, abs=0.01) + assert distribution_summary.median == pytest.approx(10.0) + assert distribution_summary.mode == 10.0 + assert distribution_summary.variance == pytest.approx(0, abs=0.1) + assert distribution_summary.std_dev == pytest.approx(0, abs=0.3) + assert distribution_summary.min == pytest.approx(1) + assert distribution_summary.max == pytest.approx(10.0) + assert distribution_summary.count == 10 + assert distribution_summary.total_sum == pytest.approx(55.0) + assert distribution_summary.percentiles.p001 == pytest.approx(10, abs=5) + assert distribution_summary.percentiles.p01 == pytest.approx(10) + assert distribution_summary.percentiles.p05 == pytest.approx(10) + assert distribution_summary.percentiles.p10 == pytest.approx(10) + assert distribution_summary.percentiles.p25 == pytest.approx(10) + assert distribution_summary.percentiles.p50 == pytest.approx(10) + assert distribution_summary.percentiles.p75 == pytest.approx(10) + assert distribution_summary.percentiles.p90 == pytest.approx(10) + assert distribution_summary.percentiles.p95 == pytest.approx(10) + assert distribution_summary.percentiles.p99 == pytest.approx(10) + assert distribution_summary.percentiles.p999 == pytest.approx(10) + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_request_times( + requests, distribution_type="concurrency", include_cdf=True ) - p99: float = Field( - description="The 99th percentile of the distribution.", + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == 10 + + +def test_distribution_summary_from_request_times_rate(): + # create consistent timestamped values matching a rate of 10 per second + requests = [(val / 10, val / 10 + 1) for val in range(10001)] + distribution_summary = DistributionSummary.from_request_times( + requests, distribution_type="rate" ) - p999: float = Field( - description="The 99.9th percentile of the distribution.", + assert distribution_summary.mean == pytest.approx(10.0, abs=0.01) + assert distribution_summary.median == pytest.approx(10.0) + assert distribution_summary.mode == pytest.approx(10.0) + assert distribution_summary.variance == pytest.approx(0, abs=0.1) + assert distribution_summary.std_dev == pytest.approx(0, abs=0.3) + assert distribution_summary.min == pytest.approx(1.0) + assert distribution_summary.max == pytest.approx(10.0) + assert distribution_summary.count == 12 + assert distribution_summary.total_sum == pytest.approx(111.0) + assert distribution_summary.percentiles.p001 == pytest.approx(10.0, abs=0.5) + assert distribution_summary.percentiles.p01 == pytest.approx(10.0) + assert distribution_summary.percentiles.p05 == pytest.approx(10.0) + assert distribution_summary.percentiles.p10 == pytest.approx(10.0) + assert distribution_summary.percentiles.p25 == pytest.approx(10.0) + assert distribution_summary.percentiles.p50 == pytest.approx(10.0) + assert distribution_summary.percentiles.p75 == pytest.approx(10.0) + assert distribution_summary.percentiles.p90 == pytest.approx(10.0) + assert distribution_summary.percentiles.p95 == pytest.approx(10.0) + assert distribution_summary.percentiles.p99 == pytest.approx(10.0) + assert distribution_summary.percentiles.p999 == pytest.approx(10.0) + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_request_times( + requests, distribution_type="rate", include_cdf=True ) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == 12 -class DistributionSummary(StandardBaseModel): - """ - Comprehensive statistical summary for numerical value distributions. +def test_distribution_summary_from_iterable_request_times(): + # create consistent timestamped values matching a rate of 10 per second + requests = [(val / 10, val / 10 + 1) for val in range(10001)] + # create 9 iterations for each request with first iter at start + 0.1 + # and spaced at 0.1 seconds apart + first_iter_times = [val / 10 + 0.1 for val in range(10001)] + iter_counts = [9 for _ in range(10001)] + first_iter_counts = [1 for _ in range(10001)] - Calculates and stores complete statistical metrics including central tendency, - dispersion, extremes, and percentiles for any numerical distribution. Supports - both weighted and unweighted data with optional cumulative distribution function - generation. Primary statistical analysis tool for request timing, performance - metrics, and benchmark result characterization. + distribution_summary = DistributionSummary.from_iterable_request_times( + requests, first_iter_times, iter_counts, first_iter_counts + ) + assert distribution_summary.mean == pytest.approx(90.0, abs=0.1) + assert distribution_summary.median == pytest.approx(80.0) + assert distribution_summary.mode == pytest.approx(80.0) + assert distribution_summary.variance == pytest.approx(704.463, abs=0.001) + assert distribution_summary.std_dev == pytest.approx(26.541, abs=0.001) + assert distribution_summary.min == pytest.approx(0.0) + assert distribution_summary.max == pytest.approx(160.0) + assert distribution_summary.count == 44 + assert distribution_summary.total_sum == pytest.approx(3538.85, abs=0.01) + assert distribution_summary.percentiles.p001 == pytest.approx(80.0) + assert distribution_summary.percentiles.p01 == pytest.approx(80.0) + assert distribution_summary.percentiles.p05 == pytest.approx(80.0) + assert distribution_summary.percentiles.p10 == pytest.approx(80.0) + assert distribution_summary.percentiles.p25 == pytest.approx(80.0) + assert distribution_summary.percentiles.p50 == pytest.approx(80.0) + assert distribution_summary.percentiles.p75 == pytest.approx(80.0) + assert distribution_summary.percentiles.p90 == pytest.approx(160.0) + assert distribution_summary.percentiles.p95 == pytest.approx(160.0) + assert distribution_summary.percentiles.p99 == pytest.approx(160.0) + assert distribution_summary.percentiles.p999 == pytest.approx(160.0) + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_iterable_request_times( + requests, first_iter_times, iter_counts, first_iter_counts, include_cdf=True + ) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == 44 - Example: - :: - # Create from simple values - summary = DistributionSummary.from_values([1.0, 2.0, 3.0, 4.0, 5.0]) - print(f"Mean: {summary.mean}, P95: {summary.percentiles.p95}") - # Create from request timings for concurrency analysis - requests = [(0.0, 1.0), (0.5, 2.0), (1.0, 2.5)] - concurrency = DistributionSummary.from_request_times( - requests, "concurrency" +def test_status_distribution_summary_initialization(): + status_distribution_summary = StatusDistributionSummary( + total=create_default_distribution_summary(), + successful=create_default_distribution_summary(), + incomplete=create_default_distribution_summary(), + errored=create_default_distribution_summary(), + ) + assert status_distribution_summary.total.mean == 50.0 + assert status_distribution_summary.successful.mean == 50.0 + assert status_distribution_summary.incomplete.mean == 50.0 + assert status_distribution_summary.errored.mean == 50.0 + + +def test_status_distribution_summary_marshalling(): + status_distribution_summary = StatusDistributionSummary( + total=create_default_distribution_summary(), + successful=create_default_distribution_summary(), + incomplete=create_default_distribution_summary(), + errored=create_default_distribution_summary(), + ) + serialized = status_distribution_summary.model_dump() + deserialized = StatusDistributionSummary.model_validate(serialized) + + for key, value in vars(status_distribution_summary).items(): + for child_key, child_value in vars(value).items(): + assert getattr(getattr(deserialized, key), child_key) == child_value + + +def test_status_distribution_summary_from_values(): + value_types: list[Literal["successful", "incomplete", "error"]] = [ + "successful", + "incomplete", + "error", + ] * 1000 + values = [float(val % 3) for val in range(3000)] + status_distribution_summary = StatusDistributionSummary.from_values( + value_types, values + ) + assert status_distribution_summary.total.count == len(values) + assert status_distribution_summary.total.mean == pytest.approx(np.mean(values)) + assert status_distribution_summary.total.cumulative_distribution_function is None + assert status_distribution_summary.successful.mean == pytest.approx( + np.mean( + [val for ind, val in enumerate(values) if value_types[ind] == "successful"] ) - """ - - mean: float = Field( - description="The mean/average of the distribution.", - ) - median: float = Field( - description="The median of the distribution.", - ) - mode: float = Field( - description="The mode of the distribution.", - ) - variance: float = Field( - description="The variance of the distribution.", - ) - std_dev: float = Field( - description="The standard deviation of the distribution.", - ) - min: float = Field( - description="The minimum value of the distribution.", - ) - max: float = Field( - description="The maximum value of the distribution.", - ) - count: int = Field( - description="The number of values in the distribution.", - ) - total_sum: float = Field( - description="The total sum of the values in the distribution.", - ) - percentiles: Percentiles = Field( - description="The percentiles of the distribution.", - ) - cumulative_distribution_function: list[tuple[float, float]] | None = Field( - description="The cumulative distribution function (CDF) of the distribution.", - default=None, - ) - - @staticmethod - def from_distribution_function( - distribution: list[tuple[float, float]], - include_cdf: bool = False, - ) -> DistributionSummary: - """ - Create statistical summary from weighted distribution or probability function. - - Converts weighted numerical values or probability distribution function (PDF) - into comprehensive statistical summary. Normalizes weights to probabilities - and calculates all statistical metrics including percentiles. - - :param distribution: List of (value, weight) or (value, probability) tuples - representing the distribution - :param include_cdf: Whether to include cumulative distribution function - in the output - :return: DistributionSummary instance with calculated statistical metrics - """ - values, weights = zip(*distribution) if distribution else ([], []) - values = np.array(values) # type: ignore[assignment] - weights = np.array(weights) # type: ignore[assignment] - - # create the PDF - probabilities = weights / np.sum(weights) # type: ignore[operator] - pdf = np.column_stack((values, probabilities)) - pdf = pdf[np.argsort(pdf[:, 0])] - values = pdf[:, 0] # type: ignore[assignment] - probabilities = pdf[:, 1] - - # calculate the CDF - cumulative_probabilities = np.cumsum(probabilities) - cdf = np.column_stack((values, cumulative_probabilities)) - - # calculate statistics - mean = np.sum(values * probabilities).item() # type: ignore[attr-defined] - median = cdf[np.argmax(cdf[:, 1] >= 0.5), 0].item() if len(cdf) > 0 else 0 # noqa: PLR2004 - mode = values[np.argmax(probabilities)].item() if len(values) > 0 else 0 # type: ignore[call-overload] - variance = np.sum((values - mean) ** 2 * probabilities).item() # type: ignore[attr-defined] - std_dev = math.sqrt(variance) - minimum = values[0].item() if len(values) > 0 else 0 - maximum = values[-1].item() if len(values) > 0 else 0 - count = len(values) - total_sum = np.sum(values).item() # type: ignore[attr-defined] - - return DistributionSummary( - mean=mean, - median=median, - mode=mode, - variance=variance, - std_dev=std_dev, - min=minimum, - max=maximum, - count=count, - total_sum=total_sum, - percentiles=( - Percentiles( - p001=cdf[np.argmax(cdf[:, 1] >= 0.001), 0].item(), # noqa: PLR2004 - p01=cdf[np.argmax(cdf[:, 1] >= 0.01), 0].item(), # noqa: PLR2004 - p05=cdf[np.argmax(cdf[:, 1] >= 0.05), 0].item(), # noqa: PLR2004 - p10=cdf[np.argmax(cdf[:, 1] >= 0.1), 0].item(), # noqa: PLR2004 - p25=cdf[np.argmax(cdf[:, 1] >= 0.25), 0].item(), # noqa: PLR2004 - p50=cdf[np.argmax(cdf[:, 1] >= 0.50), 0].item(), # noqa: PLR2004 - p75=cdf[np.argmax(cdf[:, 1] >= 0.75), 0].item(), # noqa: PLR2004 - p90=cdf[np.argmax(cdf[:, 1] >= 0.9), 0].item(), # noqa: PLR2004 - p95=cdf[np.argmax(cdf[:, 1] >= 0.95), 0].item(), # noqa: PLR2004 - p99=cdf[np.argmax(cdf[:, 1] >= 0.99), 0].item(), # noqa: PLR2004 - p999=cdf[np.argmax(cdf[:, 1] >= 0.999), 0].item(), # noqa: PLR2004 - ) - if len(cdf) > 0 - else Percentiles( - p001=0, - p01=0, - p05=0, - p10=0, - p25=0, - p50=0, - p75=0, - p90=0, - p95=0, - p99=0, - p999=0, - ) - ), - cumulative_distribution_function=cdf.tolist() if include_cdf else None, + ) + assert status_distribution_summary.successful.count == len( + [val for ind, val in enumerate(values) if value_types[ind] == "successful"] + ) + assert ( + status_distribution_summary.successful.cumulative_distribution_function is None + ) + assert status_distribution_summary.incomplete.mean == pytest.approx( + np.mean( + [val for ind, val in enumerate(values) if value_types[ind] == "incomplete"] ) + ) + assert status_distribution_summary.incomplete.count == len( + [val for ind, val in enumerate(values) if value_types[ind] == "incomplete"] + ) + assert ( + status_distribution_summary.incomplete.cumulative_distribution_function is None + ) + assert status_distribution_summary.errored.mean == pytest.approx( + np.mean([val for ind, val in enumerate(values) if value_types[ind] == "error"]) + ) + assert status_distribution_summary.errored.count == len( + [val for ind, val in enumerate(values) if value_types[ind] == "error"] + ) + assert status_distribution_summary.errored.cumulative_distribution_function is None - @staticmethod - def from_values( - values: list[float], - weights: list[float] | None = None, - include_cdf: bool = False, - ) -> DistributionSummary: - """ - Create statistical summary from numerical values with optional weights. - - Wrapper around from_distribution_function for simple value lists. If weights - are not provided, all values are equally weighted. Enables statistical - analysis of any numerical dataset. - - :param values: Numerical values representing the distribution - :param weights: Optional weights for each value. If not provided, all values - are equally weighted - :param include_cdf: Whether to include cumulative distribution function in - the output DistributionSummary - :return: DistributionSummary instance with calculated statistical metrics - :raises ValueError: If values and weights lists have different lengths - """ - if weights is None: - weights = [1.0] * len(values) - - if len(values) != len(weights): - raise ValueError( - "The length of values and weights must be the same.", - ) - - return DistributionSummary.from_distribution_function( - distribution=list(zip(values, weights)), - include_cdf=include_cdf, - ) + status_distribution_summary_cdf = StatusDistributionSummary.from_values( + value_types, values, include_cdf=True + ) + assert ( + status_distribution_summary_cdf.total.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.successful.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.incomplete.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.errored.cumulative_distribution_function + is not None + ) - @staticmethod - def from_request_times( - requests: list[tuple[float, float]], - distribution_type: Literal["concurrency", "rate"], - include_cdf: bool = False, - epsilon: float = 1e-6, - ) -> DistributionSummary: - """ - Create statistical summary from request timing data. - - Analyzes request start/end times to calculate concurrency or rate - distributions. Converts timing events into statistical metrics for - performance analysis and load characterization. - - :param requests: List of (start_time, end_time) tuples for each request - :param distribution_type: Type of analysis - "concurrency" for simultaneous - requests or "rate" for completion rates - :param include_cdf: Whether to include cumulative distribution function - :param epsilon: Threshold for merging close timing events - :return: DistributionSummary with timing-based statistical metrics - :raises ValueError: If distribution_type is not "concurrency" or "rate" - """ - if distribution_type == "concurrency": - # convert to delta changes based on when requests were running - time_deltas: dict[float, int] = defaultdict(int) - for start, end in requests: - time_deltas[start] += 1 - time_deltas[end] -= 1 - - # convert to the events over time measuring concurrency changes - events = [] - active = 0 - - for time, delta in sorted(time_deltas.items()): - active += delta - events.append((time, active)) - elif distribution_type == "rate": - # convert to events for when requests finished - global_start = min(start for start, _ in requests) if requests else 0 - events = [(global_start, 1)] + [(end, 1) for _, end in requests] - else: - raise ValueError( - f"Invalid distribution_type '{distribution_type}'. " - "Must be 'concurrency' or 'rate'." - ) - - # combine any events that are very close together - flattened_events: list[tuple[float, float]] = [] - for time, val in sorted(events): - last_time, last_val = ( - flattened_events[-1] if flattened_events else (None, None) - ) - - if ( - last_time is not None - and last_val is not None - and abs(last_time - time) <= epsilon - ): - flattened_events[-1] = (last_time, last_val + val) - else: - flattened_events.append((time, val)) - - # convert to value distribution function - distribution: dict[float, float] = defaultdict(float) - - for ind in range(len(flattened_events) - 1): - start_time, value = flattened_events[ind] - end_time, _ = flattened_events[ind + 1] - duration = end_time - start_time - - if distribution_type == "concurrency": - # weight the concurrency value by the duration - distribution[value] += duration - elif distribution_type == "rate": - # weight the rate value by the duration - rate = value / duration - distribution[rate] += duration - - distribution_list: list[tuple[float, float]] = sorted(distribution.items()) - - return DistributionSummary.from_distribution_function( - distribution=distribution_list, - include_cdf=include_cdf, - ) - @staticmethod - def from_iterable_request_times( - requests: list[tuple[float, float]], - first_iter_times: list[float], - iter_counts: list[int], - first_iter_counts: list[int] | None = None, - include_cdf: bool = False, - epsilon: float = 1e-6, - ) -> DistributionSummary: - """ - Create statistical summary from iterative request timing data. - - Analyzes autoregressive or streaming requests with multiple iterations - between start and end times. Calculates rate distributions based on - iteration timing patterns for LLM token generation analysis. - - :param requests: List of (start_time, end_time) tuples for each request - :param first_iter_times: Times when first iteration was received for - each request - :param iter_counts: Total iteration counts for each request from first - iteration to end - :param first_iter_counts: Iteration counts for first iteration (defaults - to 1 for each request) - :param include_cdf: Whether to include cumulative distribution function - :param epsilon: Threshold for merging close timing events - :return: DistributionSummary with iteration rate statistical metrics - :raises ValueError: If input lists have mismatched lengths - """ - - if first_iter_counts is None: - first_iter_counts = [1] * len(requests) - - if ( - len(requests) != len(first_iter_times) - or len(requests) != len(iter_counts) - or len(requests) != len(first_iter_counts) - ): - raise ValueError( - "requests, first_iter_times, iter_counts, and first_iter_counts must" - "be the same length." - f"Given {len(requests)}, {len(first_iter_times)}, {len(iter_counts)}, " - f"{len(first_iter_counts)}", - ) - - # first break up the requests into individual iterable events - events = defaultdict(int) - global_start = min(start for start, _ in requests) if requests else 0 - global_end = max(end for _, end in requests) if requests else 0 - events[global_start] = 0 - events[global_end] = 0 - - for (_, end), first_iter, first_iter_count, total_count in zip( - requests, first_iter_times, first_iter_counts, iter_counts - ): - events[first_iter] += first_iter_count - - if total_count > 1: - iter_latency = (end - first_iter) / (total_count - 1) - for ind in range(1, total_count): - events[first_iter + ind * iter_latency] += 1 - - # combine any events that are very close together - flattened_events: list[tuple[float, int]] = [] - - for time, count in sorted(events.items()): - last_time, last_count = ( - flattened_events[-1] if flattened_events else (None, None) - ) - - if ( - last_time is not None - and last_count is not None - and abs(last_time - time) <= epsilon - ): - flattened_events[-1] = (last_time, last_count + count) - else: - flattened_events.append((time, count)) - - # convert to value distribution function - distribution: dict[float, float] = defaultdict(float) - - for ind in range(len(flattened_events) - 1): - start_time, count = flattened_events[ind] - end_time, _ = flattened_events[ind + 1] - duration = end_time - start_time - rate = count / duration - distribution[rate] += duration - - distribution_list = sorted(distribution.items()) - - return DistributionSummary.from_distribution_function( - distribution=distribution_list, - include_cdf=include_cdf, - ) +def test_status_distribution_summary_from_request_times(): + request_types: list[Literal["successful", "incomplete", "error"]] = [ + "successful", + "incomplete", + "error", + ] * 1000 + requests = [((val % 3) / 10, (val % 3) / 10 + 1) for val in range(3000)] + status_distribution_summary = StatusDistributionSummary.from_request_times( + request_types, requests, distribution_type="concurrency" + ) + assert status_distribution_summary.total.mean == pytest.approx(2500.0, abs=0.01) + assert status_distribution_summary.total.cumulative_distribution_function is None + assert status_distribution_summary.successful.mean == pytest.approx( + 1000.0, abs=0.01 + ) + assert ( + status_distribution_summary.successful.cumulative_distribution_function is None + ) + assert status_distribution_summary.incomplete.mean == pytest.approx( + 1000.0, abs=0.01 + ) + assert ( + status_distribution_summary.incomplete.cumulative_distribution_function is None + ) + assert status_distribution_summary.errored.mean == pytest.approx(1000.0, abs=0.01) + assert status_distribution_summary.errored.cumulative_distribution_function is None + status_distribution_summary_cdf = StatusDistributionSummary.from_request_times( + request_types, requests, distribution_type="concurrency", include_cdf=True + ) + assert ( + status_distribution_summary_cdf.total.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.successful.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.incomplete.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.errored.cumulative_distribution_function + is not None + ) -class StatusDistributionSummary( - StatusBreakdown[ - DistributionSummary, - DistributionSummary, - DistributionSummary, - DistributionSummary, - ] -): - """ - Status-grouped statistical summary for request processing analysis. - - Provides comprehensive statistical analysis grouped by request status (total, - successful, incomplete, errored). Enables performance analysis across different - request outcomes for benchmarking and monitoring applications. Each status - category maintains complete DistributionSummary metrics. - - Example: - :: - status_summary = StatusDistributionSummary.from_values( - value_types=["successful", "error", "successful"], - values=[1.5, 10.0, 2.1] - ) - print(f"Success mean: {status_summary.successful.mean}") - print(f"Error rate: {status_summary.errored.count}") - """ - - @staticmethod - def from_values( - value_types: list[Literal["successful", "incomplete", "error"]], - values: list[float], - weights: list[float] | None = None, - include_cdf: bool = False, - ) -> StatusDistributionSummary: - """ - Create status-grouped statistical summary from values and status types. - - Groups numerical values by request status and calculates complete - statistical summaries for each category. Enables performance analysis - across different request outcomes. - - :param value_types: Status type for each value ("successful", "incomplete", - or "error") - :param values: Numerical values representing the distribution - :param weights: Optional weights for each value (defaults to equal weighting) - :param include_cdf: Whether to include cumulative distribution functions - :return: StatusDistributionSummary with statistics grouped by status - :raises ValueError: If input lists have mismatched lengths or invalid - status types - """ - if any( - type_ not in {"successful", "incomplete", "error"} for type_ in value_types - ): - raise ValueError( - "value_types must be one of 'successful', 'incomplete', or 'error'. " - f"Got {value_types} instead.", - ) - - if weights is None: - weights = [1.0] * len(values) - - if len(value_types) != len(values) or len(value_types) != len(weights): - raise ValueError( - "The length of value_types, values, and weights must be the same.", - ) - - _, successful_values, successful_weights = ( - zip(*successful) - if ( - successful := list( - filter( - lambda val: val[0] == "successful", - zip(value_types, values, weights), - ) - ) - ) - else ([], [], []) - ) - _, incomplete_values, incomplete_weights = ( - zip(*incomplete) - if ( - incomplete := list( - filter( - lambda val: val[0] == "incomplete", - zip(value_types, values, weights), - ) - ) - ) - else ([], [], []) - ) - _, errored_values, errored_weights = ( - zip(*errored) - if ( - errored := list( - filter( - lambda val: val[0] == "error", - zip(value_types, values, weights), - ) - ) - ) - else ([], [], []) - ) - return StatusDistributionSummary( - total=DistributionSummary.from_values( - values, - weights, - include_cdf=include_cdf, - ), - successful=DistributionSummary.from_values( - successful_values, # type: ignore[arg-type] - successful_weights, # type: ignore[arg-type] - include_cdf=include_cdf, - ), - incomplete=DistributionSummary.from_values( - incomplete_values, # type: ignore[arg-type] - incomplete_weights, # type: ignore[arg-type] - include_cdf=include_cdf, - ), - errored=DistributionSummary.from_values( - errored_values, # type: ignore[arg-type] - errored_weights, # type: ignore[arg-type] - include_cdf=include_cdf, - ), +def test_status_distribution_summary_from_iterable_request_times(): + request_types: list[Literal["successful", "incomplete", "error"]] = [ + "successful", + "incomplete", + "error", + ] * 1000 + requests = [(val % 3 / 10, val % 3 / 10 + 1) for val in range(3000)] + first_iter_times = [val % 3 / 10 + 0.1 for val in range(3000)] + iter_counts = [9 for _ in range(3000)] + first_iter_counts = [1 for _ in range(3000)] + status_distribution_summary = StatusDistributionSummary.from_iterable_request_times( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + ) + assert status_distribution_summary.total.mean == pytest.approx(21666.66, abs=0.01) + assert status_distribution_summary.total.cumulative_distribution_function is None + assert status_distribution_summary.successful.mean == pytest.approx( + 8000.0, abs=0.01 + ) + assert ( + status_distribution_summary.successful.cumulative_distribution_function is None + ) + assert status_distribution_summary.incomplete.mean == pytest.approx( + 8000.0, abs=0.01 + ) + assert ( + status_distribution_summary.incomplete.cumulative_distribution_function is None + ) + assert status_distribution_summary.errored.mean == pytest.approx(8000.0, abs=0.01) + assert status_distribution_summary.errored.cumulative_distribution_function is None + + status_distribution_summary_cdf = ( + StatusDistributionSummary.from_iterable_request_times( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + include_cdf=True, ) + ) + assert ( + status_distribution_summary_cdf.total.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.successful.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.incomplete.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.errored.cumulative_distribution_function + is not None + ) - @staticmethod - def from_request_times( - request_types: list[Literal["successful", "incomplete", "error"]], - requests: list[tuple[float, float]], - distribution_type: Literal["concurrency", "rate"], - include_cdf: bool = False, - epsilon: float = 1e-6, - ) -> StatusDistributionSummary: - """ - Create status-grouped statistical summary from request timing data. - - Analyzes request timings grouped by status to calculate concurrency or - rate distributions for each outcome category. Enables comparative - performance analysis across successful, incomplete, and errored requests. - - :param request_types: Status type for each request ("successful", - "incomplete", or "error") - :param requests: List of (start_time, end_time) tuples for each request - :param distribution_type: Analysis type - "concurrency" or "rate" - :param include_cdf: Whether to include cumulative distribution functions - :param epsilon: Threshold for merging close timing events - :return: StatusDistributionSummary with timing statistics by status - :raises ValueError: If input lists have mismatched lengths or invalid types - """ - if distribution_type not in {"concurrency", "rate"}: - raise ValueError( - f"Invalid distribution_type '{distribution_type}'. " - "Must be 'concurrency' or 'rate'." - ) - - if any( - type_ not in {"successful", "incomplete", "error"} - for type_ in request_types - ): - raise ValueError( - "request_types must be one of 'successful', 'incomplete', or 'error'. " - f"Got {request_types} instead.", - ) - - if len(request_types) != len(requests): - raise ValueError( - "The length of request_types and requests must be the same. " - f"Got {len(request_types)} and {len(requests)} instead.", - ) - - _, successful_requests = ( - zip(*successful) - if ( - successful := list( - filter( - lambda val: val[0] == "successful", - zip(request_types, requests), - ) - ) - ) - else ([], []) - ) - _, incomplete_requests = ( - zip(*incomplete) - if ( - incomplete := list( - filter( - lambda val: val[0] == "incomplete", - zip(request_types, requests), - ) - ) - ) - else ([], []) - ) - _, errored_requests = ( - zip(*errored) - if ( - errored := list( - filter( - lambda val: val[0] == "error", - zip(request_types, requests), - ) - ) - ) - else ([], []) - ) - return StatusDistributionSummary( - total=DistributionSummary.from_request_times( - requests, - distribution_type=distribution_type, - include_cdf=include_cdf, - epsilon=epsilon, - ), - successful=DistributionSummary.from_request_times( - successful_requests, # type: ignore[arg-type] - distribution_type=distribution_type, - include_cdf=include_cdf, - epsilon=epsilon, - ), - incomplete=DistributionSummary.from_request_times( - incomplete_requests, # type: ignore[arg-type] - distribution_type=distribution_type, - include_cdf=include_cdf, - epsilon=epsilon, - ), - errored=DistributionSummary.from_request_times( - errored_requests, # type: ignore[arg-type] - distribution_type=distribution_type, - include_cdf=include_cdf, - epsilon=epsilon, - ), - ) +def test_running_stats_initialization(): + running_stats = RunningStats() + assert running_stats.start_time == pytest.approx(time.time(), abs=0.01) + assert running_stats.count == 0 + assert running_stats.total == 0 + assert running_stats.last == 0 + assert running_stats.mean == 0 + assert running_stats.rate == 0 - @staticmethod - def from_iterable_request_times( - request_types: list[Literal["successful", "incomplete", "error"]], - requests: list[tuple[float, float]], - first_iter_times: list[float], - iter_counts: list[int] | None = None, - first_iter_counts: list[int] | None = None, - include_cdf: bool = False, - epsilon: float = 1e-6, - ) -> StatusDistributionSummary: - """ - Create status-grouped statistical summary from iterative request timing data. - - Analyzes autoregressive request timings grouped by status to calculate - iteration rate distributions for each outcome category. Enables comparative - analysis of token generation or streaming response performance across - different request statuses. - - :param request_types: Status type for each request ("successful", - "incomplete", or "error") - :param requests: List of (start_time, end_time) tuples for each request - :param first_iter_times: Times when first iteration was received for - each request - :param iter_counts: Total iteration counts for each request (defaults to 1) - :param first_iter_counts: Iteration counts for first iteration (defaults - to 1) - :param include_cdf: Whether to include cumulative distribution functions - :param epsilon: Threshold for merging close timing events - :return: StatusDistributionSummary with iteration statistics by status - :raises ValueError: If input lists have mismatched lengths or invalid types - """ - if any( - type_ not in {"successful", "incomplete", "error"} - for type_ in request_types - ): - raise ValueError( - "request_types must be one of 'successful', 'incomplete', or 'error'. " - f"Got {request_types} instead.", - ) - - if iter_counts is None: - iter_counts = [1] * len(requests) - - if first_iter_counts is None: - first_iter_counts = [1] * len(requests) - - if ( - len(request_types) != len(requests) - or len(requests) != len(first_iter_times) - or len(requests) != len(iter_counts) - or len(requests) != len(first_iter_counts) - ): - raise ValueError( - "request_types, requests, first_iter_times, iter_counts, and " - "first_iter_counts must be the same length." - f"Given {len(request_types)}, {len(requests)}, " - f"{len(first_iter_times)}, {len(iter_counts)}, " - f"{len(first_iter_counts)}", - ) - - ( - _, - successful_requests, - successful_first_iter_times, - successful_iter_counts, - successful_first_iter_counts, - ) = ( - zip(*successful) - if ( - successful := list( - filter( - lambda val: val[0] == "successful", - zip( - request_types, - requests, - first_iter_times, - iter_counts, - first_iter_counts, - ), - ) - ) - ) - else ([], [], [], [], []) - ) - ( - _, - incomplete_requests, - incomplete_first_iter_times, - incomplete_iter_counts, - incomplete_first_iter_counts, - ) = ( - zip(*incomplete) - if ( - incomplete := list( - filter( - lambda val: val[0] == "incomplete", - zip( - request_types, - requests, - first_iter_times, - iter_counts, - first_iter_counts, - ), - ) - ) - ) - else ([], [], [], [], []) - ) - ( - _, - errored_requests, - errored_first_iter_times, - errored_iter_counts, - errored_first_iter_counts, - ) = ( - zip(*errored) - if ( - errored := list( - filter( - lambda val: val[0] == "error", - zip( - request_types, - requests, - first_iter_times, - iter_counts, - first_iter_counts, - ), - ) - ) - ) - else ([], [], [], [], []) - ) - return StatusDistributionSummary( - total=DistributionSummary.from_iterable_request_times( - requests, - first_iter_times, - iter_counts, - first_iter_counts, - include_cdf=include_cdf, - epsilon=epsilon, - ), - successful=DistributionSummary.from_iterable_request_times( - successful_requests, # type: ignore[arg-type] - successful_first_iter_times, # type: ignore[arg-type] - successful_iter_counts, # type: ignore[arg-type] - successful_first_iter_counts, # type: ignore[arg-type] - include_cdf=include_cdf, - epsilon=epsilon, - ), - incomplete=DistributionSummary.from_iterable_request_times( - incomplete_requests, # type: ignore[arg-type] - incomplete_first_iter_times, # type: ignore[arg-type] - incomplete_iter_counts, # type: ignore[arg-type] - incomplete_first_iter_counts, # type: ignore[arg-type] - include_cdf=include_cdf, - epsilon=epsilon, - ), - errored=DistributionSummary.from_iterable_request_times( - errored_requests, # type: ignore[arg-type] - errored_first_iter_times, # type: ignore[arg-type] - errored_iter_counts, # type: ignore[arg-type] - errored_first_iter_counts, # type: ignore[arg-type] - include_cdf=include_cdf, - epsilon=epsilon, - ), - ) +def test_running_stats_marshalling(): + running_stats = RunningStats() + serialized = running_stats.model_dump() + deserialized = RunningStats.model_validate(serialized) + for key, value in vars(running_stats).items(): + assert getattr(deserialized, key) == value -class RunningStats(StandardBaseModel): - """ - Real-time statistics tracking for streaming numerical data. - - Maintains mean, rate, and cumulative statistics for continuous data streams - without storing individual values. Optimized for memory efficiency in - long-running monitoring applications. Supports arithmetic operators for - convenient value addition and provides computed properties for derived metrics. - - Example: - :: - stats = RunningStats() - stats += 10.5 # Add value using operator - stats.update(20.0, count=3) # Add value with custom count - print(f"Mean: {stats.mean}, Rate: {stats.rate}") - """ - - start_time: float = Field( - default_factory=timer.time, - description=( - "The time the running statistics object was created. " - "This is used to calculate the rate of the statistics." - ), - ) - count: int = Field( - default=0, - description="The number of values added to the running statistics.", - ) - total: float = Field( - default=0.0, - description="The total sum of the values added to the running statistics.", - ) - last: float = Field( - default=0.0, - description="The last value added to the running statistics.", - ) - - @computed_field # type: ignore[misc] - @property - def mean(self) -> float: - """ - :return: The mean of the running statistics (total / count). - If count is 0, return 0.0. - """ - if self.count == 0: - return 0.0 - return self.total / self.count - - @computed_field # type: ignore[misc] - @property - def rate(self) -> float: - """ - :return: The rate of the running statistics - (total / (time.time() - start_time)). - If count is 0, return 0.0. - """ - if self.count == 0: - return 0.0 - return self.total / (timer.time() - self.start_time) - - def __add__(self, value: Any) -> float: - """ - Add value using + operator and return current mean. - - :param value: Numerical value to add to the running statistics - :return: Updated mean after adding the value - :raises ValueError: If value is not numeric (int or float) - """ - if not isinstance(value, (int, float)): - raise ValueError( - f"Value must be an int or float, got {type(value)} instead.", - ) - - self.update(value) - - return self.mean - - def __iadd__(self, value: Any) -> RunningStats: - """ - Add value using += operator and return updated instance. - - :param value: Numerical value to add to the running statistics - :return: Self reference for method chaining - :raises ValueError: If value is not numeric (int or float) - """ - if not isinstance(value, (int, float)): - raise ValueError( - f"Value must be an int or float, got {type(value)} instead.", - ) - - self.update(value) - - return self - - def update(self, value: float, count: int = 1) -> None: - """ - Update running statistics with new value and count. - - :param value: Numerical value to add to the running statistics - :param count: Number of occurrences to count for this value (defaults to 1) - """ - self.count += count - self.total += value - self.last = value - - -class TimeRunningStats(RunningStats): - """ - Specialized running statistics for time-based measurements. - - Extends RunningStats with time-specific computed properties for millisecond - conversions. Designed for tracking latency, duration, and timing metrics in - performance monitoring applications. - - Example: - :: - time_stats = TimeRunningStats() - time_stats += 0.125 # Add 125ms in seconds - print(f"Mean: {time_stats.mean_ms}ms, Total: {time_stats.total_ms}ms") - """ - - @computed_field # type: ignore[misc] - @property - def total_ms(self) -> float: - """ - :return: The total time multiplied by 1000.0 to convert to milliseconds. - """ - return self.total * 1000.0 - - @computed_field # type: ignore[misc] - @property - def last_ms(self) -> float: - """ - :return: The last time multiplied by 1000.0 to convert to milliseconds. - """ - return self.last * 1000.0 - - @computed_field # type: ignore[misc] - @property - def mean_ms(self) -> float: - """ - :return: The mean time multiplied by 1000.0 to convert to milliseconds. - """ - return self.mean * 1000.0 - - @computed_field # type: ignore[misc] - @property - def rate_ms(self) -> float: - """ - :return: The rate of the running statistics multiplied by 1000.0 - to convert to milliseconds. - """ - return self.rate * 1000.0 + +def test_running_stats_update(): + running_stats = RunningStats() + running_stats.update(1) + assert running_stats.count == 1 + assert running_stats.total == 1 + assert running_stats.last == 1 + assert running_stats.mean == 1 + time.sleep(1.0) + assert running_stats.rate == pytest.approx( + 1.0 / (time.time() - running_stats.start_time), abs=0.1 + ) + + running_stats.update(2) + assert running_stats.count == 2 + assert running_stats.total == 3 + assert running_stats.last == 2 + assert running_stats.mean == 1.5 + time.sleep(1) + assert running_stats.rate == pytest.approx( + 3 / (time.time() - running_stats.start_time), abs=0.1 + ) + + +def test_running_stats_add(): + running_stats = RunningStats() + mean = running_stats + 1 + assert mean == 1 + assert mean == running_stats.mean + assert running_stats.count == 1 + assert running_stats.total == 1 + assert running_stats.last == 1 + + +def test_running_stats_iadd(): + running_stats = RunningStats() + running_stats += 1 + assert running_stats.count == 1 + assert running_stats.total == 1 + assert running_stats.last == 1 + assert running_stats.mean == 1 + + +def test_time_running_stats_initialization(): + time_running_stats = TimeRunningStats() + assert time_running_stats.start_time == pytest.approx(time.time(), abs=0.01) + assert time_running_stats.count == 0 + assert time_running_stats.total == 0 + assert time_running_stats.last == 0 + assert time_running_stats.mean == 0 + assert time_running_stats.rate == 0 + assert time_running_stats.total_ms == 0 + assert time_running_stats.last_ms == 0 + assert time_running_stats.mean_ms == 0 + assert time_running_stats.rate_ms == 0 + + +def test_time_running_stats_marshalling(): + time_running_stats = TimeRunningStats() + serialized = time_running_stats.model_dump() + deserialized = TimeRunningStats.model_validate(serialized) + + for key, value in vars(time_running_stats).items(): + assert getattr(deserialized, key) == value + + +def test_time_running_stats_update(): + time_running_stats = TimeRunningStats() + time_running_stats.update(1) + assert time_running_stats.count == 1 + assert time_running_stats.total == 1 + assert time_running_stats.last == 1 + assert time_running_stats.mean == 1 + assert time_running_stats.total_ms == 1000 + assert time_running_stats.last_ms == 1000 + assert time_running_stats.mean_ms == 1000 + time.sleep(1.0) + assert time_running_stats.rate == pytest.approx( + 1.0 / (time.time() - time_running_stats.start_time), abs=0.1 + ) + assert time_running_stats.rate_ms == pytest.approx( + 1000 / (time.time() - time_running_stats.start_time), abs=0.1 + ) + + time_running_stats.update(2) + assert time_running_stats.count == 2 + assert time_running_stats.total == 3 + assert time_running_stats.last == 2 + assert time_running_stats.mean == 1.5 + assert time_running_stats.total_ms == 3000 + assert time_running_stats.last_ms == 2000 + assert time_running_stats.mean_ms == 1500 + time.sleep(1) + assert time_running_stats.rate == pytest.approx( + 3 / (time.time() - time_running_stats.start_time), abs=0.1 + ) + assert time_running_stats.rate_ms == pytest.approx( + 3000 / (time.time() - time_running_stats.start_time), abs=0.1 + ) diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py index 5e69fdf6..2f363c46 100644 --- a/tests/unit/utils/test_text.py +++ b/tests/unit/utils/test_text.py @@ -14,7 +14,7 @@ clean_text, filter_text, format_value_display, - is_puncutation, + is_punctuation, load_text, split_text, split_text_list_by_length, @@ -392,8 +392,8 @@ class TestIsPunctuation: ], ) def test_invocation(self, text, expected): - """Test is_puncutation with various characters.""" - result = is_puncutation(text) + """Test is_punctuation with various characters.""" + result = is_punctuation(text) assert result == expected @pytest.mark.sanity @@ -405,9 +405,9 @@ def test_invocation(self, text, expected): ], ) def test_invalid_invocation(self, text): - """Test is_puncutation with invalid inputs.""" + """Test is_punctuation with invalid inputs.""" with pytest.raises((TypeError, AttributeError)): - is_puncutation(text) + is_punctuation(text) class TestEndlessTextCreator: From 95cb1d70316223528df69ab604853cc2c3725ad2 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Tue, 26 Aug 2025 13:27:46 -0400 Subject: [PATCH 6/9] Update src/guidellm/utils/functions.py Co-authored-by: Samuel Monson Signed-off-by: Mark Kurtz --- src/guidellm/utils/functions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/guidellm/utils/functions.py b/src/guidellm/utils/functions.py index 6343cbf2..b28aa21e 100644 --- a/src/guidellm/utils/functions.py +++ b/src/guidellm/utils/functions.py @@ -124,10 +124,7 @@ def safe_format_timestamp( :param default: Value to return if timestamp is invalid or None :return: Formatted timestamp string or default value """ - if timestamp is None or timestamp < 0 or timestamp > 2**31: - return default - try: return datetime.fromtimestamp(timestamp).strftime(format_) - except (ValueError, OverflowError, OSError): + except (ValueError, TypeError, OverflowError, OSError): return default From c8b730bb3f702e456b60104f5cecc912dbb015e1 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Tue, 26 Aug 2025 13:28:09 -0400 Subject: [PATCH 7/9] Update src/guidellm/utils/mixins.py Co-authored-by: Samuel Monson Signed-off-by: Mark Kurtz --- src/guidellm/utils/mixins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/guidellm/utils/mixins.py b/src/guidellm/utils/mixins.py index 1b61f491..bf0caf74 100644 --- a/src/guidellm/utils/mixins.py +++ b/src/guidellm/utils/mixins.py @@ -60,7 +60,7 @@ def extract_from_obj(cls, obj: Any) -> dict[str, Any]: { key: val if isinstance(val, (str, int, float, bool, list, dict)) - else str(val) + else repr(val) for key, val in obj.__dict__.items() if not key.startswith("_") } From 891402a2639044564c7e77caf5bbd11e9ed0e02f Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Tue, 26 Aug 2025 13:28:16 -0400 Subject: [PATCH 8/9] Update src/guidellm/utils/mixins.py Co-authored-by: Samuel Monson Signed-off-by: Mark Kurtz --- src/guidellm/utils/mixins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/guidellm/utils/mixins.py b/src/guidellm/utils/mixins.py index bf0caf74..85812593 100644 --- a/src/guidellm/utils/mixins.py +++ b/src/guidellm/utils/mixins.py @@ -90,7 +90,7 @@ def create_info_dict(cls, obj: Any) -> dict[str, Any]: { key: val if isinstance(val, (str, int, float, bool, list, dict)) - else str(val) + else repr(val) for key, val in obj.__dict__.items() if not key.startswith("_") } From b70ee9816f158ac9d595131a47b0657b2f0b5067 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Tue, 26 Aug 2025 17:39:44 +0000 Subject: [PATCH 9/9] Review fixes --- src/guidellm/utils/mixins.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/guidellm/utils/mixins.py b/src/guidellm/utils/mixins.py index 85812593..b001ff2d 100644 --- a/src/guidellm/utils/mixins.py +++ b/src/guidellm/utils/mixins.py @@ -12,6 +12,10 @@ __all__ = ["InfoMixin"] +PYTHON_PRIMITIVES = (str, int, float, bool, list, tuple, dict) +"""Type alias for serialized object representations""" + + class InfoMixin: """ Mixin class providing standardized metadata extraction for introspection. @@ -58,9 +62,7 @@ def extract_from_obj(cls, obj: Any) -> dict[str, Any]: "module": obj.__class__.__module__ if hasattr(obj, "__class__") else None, "attributes": ( { - key: val - if isinstance(val, (str, int, float, bool, list, dict)) - else repr(val) + key: val if isinstance(val, PYTHON_PRIMITIVES) else repr(val) for key, val in obj.__dict__.items() if not key.startswith("_") }